Skip to content

Configuration

evolve reads at most one config file from the repository root:

  • .evolve.yaml
  • .evolve.yml
  • .evolve.json
  • .evolve.jsonc

Settings are layered, lowest to highest precedence:

  1. Built-in defaults
  2. The config file
  3. EVOLVE_* environment variables
  4. Explicit CLI flags

Common settings

Key Purpose
layout Force a repository layout (auto / single / multi / marketplace).
models Default providers / model ids to run.
harnesses Default agent CLIs (harnesses) to drive.
cache_dir Where to cache run state.
results_format json, jsonc or yaml for stored results and rollups.
max_turns Default per-case turn cap for behavioral evals.
stale_results keep or drop results outside the active models set.
checks.* Every rule the static checks apply.
report.thresholds.* Pass-rate gates for report --check.
providers.<name>.models Replace a provider's builtin model matrix.
# yaml-language-server: $schema=https://raw.githubusercontent.com/bitwise-media-group/evolve/main/docs/config/config.schema.json
# evolve configuration — every value below is the built-in default.
# Generated by `evolve docs --format config`.

# Repository layout: auto, marketplace, multi, or single.
layout: "auto"

# Restriction on which models exist: provider ids, canonical model ids
# (anthropic/claude-sonnet-4-6), or all. Unlisted models are unavailable.
# --model filters within it.
# Default: unset — every model runnable by an available harness.
# models: ["anthropic/claude-sonnet-4-6"]

# Restriction on which agent CLIs (claude, codex, gemini, cursor, copilot,
# antigravity) may drive models. --harness filters within it.
# Default: unset — every harness found on PATH.
# harnesses: ["claude","copilot"]

# Directory holding the token-count cache.
# Default: unset — the OS user cache dir.
# cache_dir: "~/.cache/evolve"

# Format for committed results files and the EVALUATION rollup: json,
# jsonc, or yaml.
results_format: "json"

telemetry:
  # Directory for the OpenTelemetry JSON exporter (traces.json,
  # metrics.json, logs.json); the --telemetry-dir flag overrides it and both
  # win over OTEL_* env vars.
  # Default: unset — telemetry disabled.
  # dir: "./telemetry"

# Default maximum agent turns per behavioral eval; --max-turns and a
# per-eval max_turns override it.
max_turns: 20

# Benchmark each eval without the skill (the skill's lift over no skill),
# recomputed only when the eval or its fixtures change. --baseline
# overrides for one run.
baseline: true

# How run/report treat stored results for models outside the `models`
# restriction: keep or drop. --stale-results overrides.
# Default: unset — prompt on a terminal, otherwise keep.
# stale_results: "keep"

sandbox:
  # Confine agent writes with an OS sandbox (sandbox-exec on macOS,
  # bubblewrap on Linux); --no-sandbox overrides for one run.
  enabled: true

  # Directories kept read-only to agent runs so an escaping agent cannot
  # modify other source repositories; the workspace stays writable. Reads,
  # the network, and tool caches outside these roots are unaffected.
  # Default: unset — the parent directory of the repository under test.
  # protected_roots: ["~/Repos"]

checks:
  # License every SKILL.md must declare; when unset, skills must not declare
  # one.
  # Default: unset — the license field is forbidden.
  # license: "MIT"

  # Regex every skill description must match.
  description_pattern: "Use (when|after|before)"

  # Maximum SKILL.md line count.
  max_skill_lines: 500

  # Ideal SKILL.md line count for the advisory size signal (full at or
  # below; zero at the cap).
  ideal_skill_lines: 200

  # Emit the advisory skill-quality signals after run checks; the
  # --no-signals flag forces them off.
  signals: true

  # Require .codex-plugin/plugin.json beside Claude's manifest.
  require_codex_manifest: true

  # Forbid a hooks/ directory in plugins.
  forbid_hooks: true

  # Validate marketplace manifests (marketplace layout only).
  marketplace: true

report:
  thresholds:
    # Minimum triggers pass rate (0-1); report --check exits 1 below it.
    # Default: unset — no gate.
    # triggers_min_pass_rate: 0.8

    # Minimum evals pass rate (0-1); report --check exits 1 below it.
    # Default: unset — no gate.
    # evals_min_pass_rate: 0.9

    # Model keys (provider/model-id) the thresholds apply to.
    # Default: unset — every model with stored results.
    # models: ["anthropic/claude-fable-5"]

# Per-provider overrides: providers.<name>.models replaces that provider's
# builtin model matrix (model ids, display names, USD-per-mtok pricing).
# Default: unset — every provider keeps its builtin models.
# providers:
#   cursor:
#     models:
#       - id: "composer-2.5"
#         display: "Cursor Composer 2.5"
#         input_per_mtok: 3.0
#         output_per_mtok: 15.0
// evolve configuration — every value below is the built-in default.
// Generated by `evolve docs --format config`.
{
  "$schema": "https://raw.githubusercontent.com/bitwise-media-group/evolve/main/docs/config/config.schema.json",

  // Repository layout: auto, marketplace, multi, or single.
  "layout": "auto",

  // Restriction on which models exist: provider ids, canonical model ids
  // (anthropic/claude-sonnet-4-6), or all. Unlisted models are unavailable.
  // --model filters within it.
  // Default: unset — every model runnable by an available harness.
  // "models": ["anthropic/claude-sonnet-4-6"],

  // Restriction on which agent CLIs (claude, codex, gemini, cursor, copilot,
  // antigravity) may drive models. --harness filters within it.
  // Default: unset — every harness found on PATH.
  // "harnesses": ["claude","copilot"],

  // Directory holding the token-count cache.
  // Default: unset — the OS user cache dir.
  // "cache_dir": "~/.cache/evolve",

  // Format for committed results files and the EVALUATION rollup: json,
  // jsonc, or yaml.
  "results_format": "json",

  "telemetry": {
    // Directory for the OpenTelemetry JSON exporter (traces.json,
    // metrics.json, logs.json); the --telemetry-dir flag overrides it and both
    // win over OTEL_* env vars.
    // Default: unset — telemetry disabled.
    // "dir": "./telemetry",
  },

  // Default maximum agent turns per behavioral eval; --max-turns and a
  // per-eval max_turns override it.
  "max_turns": 20,

  // Benchmark each eval without the skill (the skill's lift over no skill),
  // recomputed only when the eval or its fixtures change. --baseline
  // overrides for one run.
  "baseline": true,

  // How run/report treat stored results for models outside the `models`
  // restriction: keep or drop. --stale-results overrides.
  // Default: unset — prompt on a terminal, otherwise keep.
  // "stale_results": "keep",

  "sandbox": {
    // Confine agent writes with an OS sandbox (sandbox-exec on macOS,
    // bubblewrap on Linux); --no-sandbox overrides for one run.
    "enabled": true

    // Directories kept read-only to agent runs so an escaping agent cannot
    // modify other source repositories; the workspace stays writable. Reads,
    // the network, and tool caches outside these roots are unaffected.
    // Default: unset — the parent directory of the repository under test.
    // "protected_roots": ["~/Repos"],
  },

  "checks": {
    // License every SKILL.md must declare; when unset, skills must not declare
    // one.
    // Default: unset — the license field is forbidden.
    // "license": "MIT",

    // Regex every skill description must match.
    "description_pattern": "Use (when|after|before)",

    // Maximum SKILL.md line count.
    "max_skill_lines": 500,

    // Ideal SKILL.md line count for the advisory size signal (full at or
    // below; zero at the cap).
    "ideal_skill_lines": 200,

    // Emit the advisory skill-quality signals after run checks; the
    // --no-signals flag forces them off.
    "signals": true,

    // Require .codex-plugin/plugin.json beside Claude's manifest.
    "require_codex_manifest": true,

    // Forbid a hooks/ directory in plugins.
    "forbid_hooks": true,

    // Validate marketplace manifests (marketplace layout only).
    "marketplace": true
  },

  "report": {
    "thresholds": {
      // Minimum triggers pass rate (0-1); report --check exits 1 below it.
      // Default: unset — no gate.
      // "triggers_min_pass_rate": 0.8,

      // Minimum evals pass rate (0-1); report --check exits 1 below it.
      // Default: unset — no gate.
      // "evals_min_pass_rate": 0.9,

      // Model keys (provider/model-id) the thresholds apply to.
      // Default: unset — every model with stored results.
      // "models": ["anthropic/claude-fable-5"],
    }
  }

  // Per-provider overrides: providers.<name>.models replaces that provider's
  // builtin model matrix (model ids, display names, USD-per-mtok pricing).
  // Default: unset — every provider keeps its builtin models.
  // "providers": {
  //   "cursor": {
  //     "models": [
  //       { "id": "composer-2.5", "display": "Cursor Composer 2.5", "input_per_mtok": 3.0, "output_per_mtok": 15.0 }
  //     ]
  //   }
  // }
}

Both examples above are generated and committed under docs/config/; the same structure applies in every supported format (.evolve.json and .evolve.yml too), and JSONC additionally tolerates comments and trailing commas. The full generated reference follows below. All of it is regenerated by make docs.

Repository layouts

evolve auto-detects three shapes (override with --layout):

Layout Marker Skills Evals
single .claude-plugin/plugin.json skills/<skill>/ evals/<skill>/
multi plugins/*/.claude-plugin/plugin.json plugins/<p>/skills/<skill>/ plugins/<p>/evals/<skill>/
marketplace .claude-plugin/marketplace.json at root plugins/<p>/skills/<skill>/ plugins/<p>/evals/<skill>/

A multi repo is a marketplace repo without marketplace manifests — marketplace checks are skipped. In a single repo the repository root is the plugin.

Configuration Reference

Key Type Default Description
layout string "auto" Repository layout: auto, marketplace, multi, or single.
models list of strings unset — every model runnable by an available harness Restriction on which models exist: provider ids, canonical model ids (anthropic/claude-sonnet-4-6), or all. Unlisted models are unavailable. --model filters within it.
harnesses list of strings unset — every harness found on PATH Restriction on which agent CLIs (claude, codex, gemini, cursor, copilot, antigravity) may drive models. --harness filters within it.
cache_dir string unset — the OS user cache dir Directory holding the token-count cache.
results_format string "json" Format for committed results files and the EVALUATION rollup: json, jsonc, or yaml.
telemetry.dir string unset — telemetry disabled Directory for the OpenTelemetry JSON exporter (traces.json, metrics.json, logs.json); the --telemetry-dir flag overrides it and both win over OTEL_* env vars.
max_turns int 20 Default maximum agent turns per behavioral eval; --max-turns and a per-eval max_turns override it.
baseline bool true Benchmark each eval without the skill (the skill's lift over no skill), recomputed only when the eval or its fixtures change. --baseline overrides for one run.
stale_results string unset — prompt on a terminal, otherwise keep How run/report treat stored results for models outside the models restriction: keep or drop. --stale-results overrides.
sandbox.enabled bool true Confine agent writes with an OS sandbox (sandbox-exec on macOS, bubblewrap on Linux); --no-sandbox overrides for one run.
sandbox.protected_roots list of strings unset — the parent directory of the repository under test Directories kept read-only to agent runs so an escaping agent cannot modify other source repositories; the workspace stays writable. Reads, the network, and tool caches outside these roots are unaffected.
checks.license string unset — the license field is forbidden License every SKILL.md must declare; when unset, skills must not declare one.
checks.description_pattern string "Use (when\|after\|before)" Regex every skill description must match.
checks.max_skill_lines int 500 Maximum SKILL.md line count.
checks.ideal_skill_lines int 200 Ideal SKILL.md line count for the advisory size signal (full at or below; zero at the cap).
checks.signals bool true Emit the advisory skill-quality signals after run checks; the --no-signals flag forces them off.
checks.require_codex_manifest bool true Require .codex-plugin/plugin.json beside Claude's manifest.
checks.forbid_hooks bool true Forbid a hooks/ directory in plugins.
checks.marketplace bool true Validate marketplace manifests (marketplace layout only).
report.thresholds.triggers_min_pass_rate float unset — no gate Minimum triggers pass rate (0-1); report --check exits 1 below it.
report.thresholds.evals_min_pass_rate float unset — no gate Minimum evals pass rate (0-1); report --check exits 1 below it.
report.thresholds.models list of strings unset — every model with stored results Model keys (provider/model-id) the thresholds apply to.