diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index ba4fac44..2e833ddc 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.15.4" + "version": "1.16.0" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 784f4b5f..fd77b933 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.15.4", + "version": "1.16.0", "author": { "name": "PolyArch" }, diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 976f54fd..3298b26c 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -3,7 +3,7 @@ This is a Claude Code plugin that provides iterative development with Codex revi # Humanize Project Rules - Everything about this project, including but not limited to implementations, comments, tests and documentations should be in English. No Emoji or CJK char is allowed. -- If under `main` branch, every commit MUST include a version bump in `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json` and `README.md` (the "Current Version" line). If not under `main` branch, please make sure that the current branch's `version` in those three files has a incremental update compared to that of `main` branch. The `version` must be identical in those three files. +- If version bump is required, please bump them in three files: `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json` and `README.md` (the "Current Version" line). - Version number must be in format of `X.Y.Z` where X/Y/Z is numeric number. Version MUST NOT include anything other than `X.Y.Z`. For example, a good version is `9.732.42`; Bad version examples (MUST NOT USE): `3.22.7-alpha` (extra "-alpha" string), `9.77.2 (2026-01-07)` (useless date/timestamp). - The plan template in `commands/gen-plan.md` (Phase 5 Plan Structure section) and `prompt-template/plan/gen-plan-template.md` are intentionally kept in sync. When modifying either file, ensure both are updated to maintain consistency. - Conversely, changes to `prompt-template/plan/gen-plan-template.md` must also be reflected in the Plan Structure section of `commands/gen-plan.md`. diff --git a/.gitignore b/.gitignore index 8ef0d573..e5bcf34c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,9 +3,12 @@ temp # Local Claude client settings /.claude/settings.json +/.claude/scheduled_tasks.lock # Humanize state directories (runtime-generated, project-local) .humanize/ +.claude-flow/ +.swarm/ # Python cache __pycache__/ diff --git a/README.md b/README.md index 69d587d3..05f2fdd3 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.15.4** +**Current Version: 1.16.0** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. @@ -25,6 +25,7 @@ A Claude Code plugin that provides iterative development with independent AI rev The loop has two phases: **Implementation** (Claude works, Codex reviews summaries) and **Code Review** (Codex checks code quality with severity markers). Issues feed back into implementation until resolved. + ## Install ```bash @@ -40,25 +41,39 @@ Requires [codex CLI](https://github.com/openai/codex) for review. See the full [ ## Quick Start -1. **Generate a plan** from your draft: +1. **Generate an idea draft** from a loose thought (optional — skip if you already have a draft): + ```bash + /humanize:gen-idea "add undo/redo to the editor" + ``` + Output goes to `.humanize/ideas/-.md` by default. Pass a `.md` path to expand existing rough notes. `--n` controls how many parallel directions explore the idea (default 6). + +2. **Generate a plan** from your draft: ```bash /humanize:gen-plan --input draft.md --output docs/plan.md ``` -2. **Refine an annotated plan** before implementation when reviewers add `CMT:` ... `ENDCMT` comments: +3. **Refine an annotated plan** before implementation when reviewers add comments (`CMT:` ... `ENDCMT`, `` ... ``, or `` ... ``): ```bash /humanize:refine-plan --input docs/plan.md ``` -3. **Run the loop**: +4. **Run the loop**: ```bash /humanize:start-rlcr-loop docs/plan.md ``` -4. **Monitor progress**: +5. **Consult Gemini** for deep web research (requires Gemini CLI): + ```bash + /humanize:ask-gemini What are the latest best practices for X? + ``` + +6. **Monitor progress (in another terminal, not inside Claude Code)**: ```bash - source /scripts/humanize.sh - humanize monitor rlcr + source /scripts/humanize.sh # Or just add it into your .bashec or .zshrc + humanize monitor rlcr # RLCR loop + humanize monitor skill # All skill invocations (codex + gemini) + humanize monitor codex # Codex invocations only + humanize monitor gemini # Gemini invocations only ``` ## Monitor Dashboard diff --git a/commands/cancel-pr-loop.md b/commands/cancel-pr-loop.md deleted file mode 100644 index 9f7b5bd4..00000000 --- a/commands/cancel-pr-loop.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -description: "Cancel active PR loop" -allowed-tools: ["Bash(${CLAUDE_PLUGIN_ROOT}/scripts/cancel-pr-loop.sh)", "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/cancel-pr-loop.sh --force)"] -disable-model-invocation: true ---- - -# Cancel PR Loop - -To cancel the active PR loop: - -1. Run the cancel script: - -```bash -"${CLAUDE_PLUGIN_ROOT}/scripts/cancel-pr-loop.sh" -``` - -2. Check the first line of output: - - **NO_LOOP** or **NO_ACTIVE_LOOP**: Say "No active PR loop found." - - **CANCELLED**: Report the cancellation message from the output - -**Key principle**: The script handles all cancellation logic. A PR loop is active if `state.md` exists in the newest PR loop directory (.humanize/pr-loop/). - -The loop directory with comments, resolution summaries, and state information will be preserved for reference. - -**Note**: This command only affects PR loops. RLCR loops (.humanize/rlcr/) are not affected. Use `/humanize:cancel-rlcr-loop` to cancel RLCR loops. diff --git a/commands/cancel-rlcr-loop.md b/commands/cancel-rlcr-loop.md index abea26ad..4f9e1209 100644 --- a/commands/cancel-rlcr-loop.md +++ b/commands/cancel-rlcr-loop.md @@ -17,6 +17,7 @@ To cancel the active loop: 2. Check the first line of output: - **NO_LOOP** or **NO_ACTIVE_LOOP**: Say "No active RLCR loop found." - **CANCELLED**: Report the cancellation message from the output + - **CANCELLED_METHODOLOGY_ANALYSIS**: Report the cancellation message from the output - **CANCELLED_FINALIZE**: Report the cancellation message from the output - **FINALIZE_NEEDS_CONFIRM**: The loop is in Finalize Phase. Continue to step 3 @@ -33,6 +34,6 @@ To cancel the active loop: - **If user chooses "No, let it finish"**: - Report: "Understood. The Finalize Phase will continue. Once complete, the loop will end normally." -**Key principle**: The script handles all cancellation logic. A loop is active if `state.md` (normal loop) or `finalize-state.md` (Finalize Phase) exists in the newest loop directory. +**Key principle**: The script handles all cancellation logic. A loop is active if `state.md` (normal loop), `methodology-analysis-state.md` (Methodology Analysis Phase), or `finalize-state.md` (Finalize Phase) exists in the newest loop directory. The loop directory with summaries, review results, and state information will be preserved for reference. diff --git a/commands/gen-idea.md b/commands/gen-idea.md new file mode 100644 index 00000000..2ef61e82 --- /dev/null +++ b/commands/gen-idea.md @@ -0,0 +1,209 @@ +--- +description: "Generate a repo-grounded idea draft via directed-swarm exploration" +argument-hint: " [--n ] [--output ]" +allowed-tools: + - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/validate-gen-idea-io.sh:*)" + - "Read" + - "Glob" + - "Grep" + - "Task" + - "Write" +--- + +# Generate Idea Draft from Loose Input + +Read and execute below with ultrathink. + +## Hard Constraint: Draft-Only Output + +This command MUST NOT implement features, modify source code, or create commits while producing the draft. Permitted writes are limited to the single output draft file produced in Phase 4; prerequisite directory creation for the default `.humanize/ideas/` path by the validation script is permitted as part of that write. All exploration subagents run read-only. + +This command transforms a loose idea into a repo-grounded draft suitable as input to `/humanize:gen-plan`. It applies directed-diversity exploration: a lead picks N orthogonal directions, N parallel `Explore` subagents develop each, the lead synthesizes a draft with one primary direction plus N-1 alternatives. Each direction carries objective evidence from the repo. + +## Workflow Overview + +> **Sequential Execution Constraint**: All phases MUST execute strictly in order. Each phase fully completes before the next. + +1. Parse Input +2. IO Validation +3. Direction Generation +4. Parallel Exploration +5. Synthesis and Write + +--- + +## Phase 0: Parse Input + +Extract from `$ARGUMENTS`: +- First positional: inline idea text or path to a `.md` file (required). +- `--n `: number of directions. Default 6. +- `--output `: target draft path. Default resolved by the validation script. + +Do not interpret or rewrite the idea text here. Pass `$ARGUMENTS` through to Phase 1 unchanged. + +--- + +## Phase 1: IO Validation + +Run: +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/validate-gen-idea-io.sh" $ARGUMENTS +``` + +Handle exit codes: +- `0`: Parse stdout to extract `INPUT_MODE`, `OUTPUT_FILE`, `SLUG`, `TEMPLATE_FILE`, `N` (each appears on its own `KEY: value` line). When `INPUT_MODE` is `file`, stdout additionally contains an `IDEA_BODY_FILE: ` line; extract that too. Continue to Phase 2. (`SLUG` is informational — the script has already incorporated it into `OUTPUT_FILE`, so later phases do not need to use `SLUG` directly.) +- `1`: Report "Missing or empty idea input" and stop. +- `2`: Report "Input looks like a file path but is missing, not readable, or not `.md`" and stop. +- `3`: Report "Output directory does not exist — please create it or choose a different path" and stop. +- `4`: Report "Output file already exists — choose a different path" and stop. +- `5`: Report "No write permission to output directory" and stop. +- `6`: Report "Invalid arguments" with the stdout usage text and stop. +- `7`: Report "Template file missing — plugin configuration error" and stop. + +Before `VALIDATION_SUCCESS`, stdout may contain one or more lines starting with `WARNING:` (for example, `WARNING: short idea ( chars); proceeding` when an inline idea is under 10 characters). Surface these warnings to the user in your final report but continue Phase 2 normally. `WARNING:` lines are informational, not errors. + +Obtain the idea body into memory as `IDEA_BODY`, based on `INPUT_MODE`: +- `inline`: stdout contains a sentinel block at the end of the success output; extract all text between the `=== IDEA_BODY_BEGIN ===` and `=== IDEA_BODY_END ===` lines (exclusive). The script emits a trailing newline after the last body line. +- `file`: read the full contents of `IDEA_BODY_FILE` using the `Read` tool. + +Preserve byte-identical content in memory for later phases. No on-disk tempfile is created in inline mode — the stdout sentinel block is the authoritative source. + +--- + +## Phase 2: Direction Generation + +Generate exactly `N` orthogonal directions for exploring the idea. + +### Context to Gather + +Before generating directions, read (paths relative to the project root, which is `$(git rev-parse --show-toplevel)`): +- `README.md` at the project root. +- `CLAUDE.md` at the project root (if it exists). +- `.claude/CLAUDE.md` (if it exists). +- Top-level directory listing via `Glob` with pattern `*` (one level, no recursion). + +This context grounds the directions in the actual repo rather than generic brainstorming. + +### Generation Rules + +Produce exactly `N` direction entries. Each entry has: +- `name`: a 2-5 word short label. +- `rationale`: a single sentence explaining why this angle is distinct from the other directions. + +Hard constraint: **orthogonality**. Two near-duplicate directions defeat the directed-diversity premise. Before returning: +- If two directions feel like dupes, replace one with a genuinely different angle. +- If a direction collapses to "just do X better" with no angle distinction, replace it. +- Do not emit directions that merely restate the idea in different words. + +### Retry and Degradation + +- If the first pass returns fewer than `N` entries, regenerate once with an explicit "you MUST produce `N` orthogonal directions" instruction. +- If the second pass still returns fewer than `N` but at least 2, proceed with the reduced count and emit a warning to the user: `Warning: direction generation returned of requested directions; proceeding with reduced count.` +- If fewer than 2 directions are produced, stop with error: `direction generation degraded; retry.` + +Store the final direction list as `DIRECTIONS` (ordered; index 0..len-1). + +--- + +## Phase 3: Parallel Exploration + +Dispatch all directions in a **single Task-tool message** containing one Task invocation per direction. This is the W2S parallel-swarm step. + +### Subagent Invocation + +For each direction in `DIRECTIONS`, launch one `Explore` subagent. Each invocation prompt MUST include: + +1. A verbatim copy of the idea body (`IDEA_BODY`) captured in Phase 1. +2. The assigned direction (name + rationale). +3. The following instruction block (reproduce verbatim in the subagent prompt): + +> Explore this direction within the current repo. Gather OBJECTIVE EVIDENCE: +> - Specific repo paths with existing patterns worth extending. +> - Prior art or precedent in the codebase or adjacent tooling. +> - Measurable considerations (approximate complexity, LOC surface, performance implications) where discoverable from reading the code. +> +> Read-only. Do not write any files. +> +> If no concrete evidence exists for this direction, report the literal string `exploratory, no concrete precedent` once in OBJECTIVE_EVIDENCE and stop exploring further. Fabrication of references is forbidden. +> +> Return a structured proposal with exactly these fields: +> - `APPROACH_SUMMARY`: concrete design description (what to build, core mechanism, affected components). +> - `OBJECTIVE_EVIDENCE`: bullet list of repo paths, prior art, or the `exploratory, no concrete precedent` sentinel. +> - `KNOWN_RISKS`: short bullet list. +> - `CONFIDENCE`: one of `high`, `medium`, `low`. + +### Collection and Degradation + +Collect all subagent responses. For each response: +- Parse the four required fields. If a field is missing, mark that proposal as degraded and drop it. +- If fewer than 2 proposals survive, stop with error: `exploration phase degraded; retry.` +- Otherwise continue with the surviving proposals. + +Associate each surviving proposal with its originating direction (so Phase 4 can label it with the original direction name). When numbering alternatives in Phase 4 after any drops, renumber survivors sequentially as Alt-1..Alt-K (where K is the count of surviving non-primary directions). Do not preserve gaps from dropped proposals. + +--- + +## Phase 4: Synthesis and Write + +### Step 4.1: Pick the Primary Direction + +Review all surviving proposals. Choose the strongest as the primary based on: +1. Evidence density — more concrete repo references outranks fewer. +2. Fit with existing repo patterns — extending patterns outranks introducing unfamiliar paradigms. +3. Implementation surface area — prefer smaller surface where quality is otherwise comparable. +4. Declared `CONFIDENCE` — `high` > `medium` > `low` as tiebreaker. + +Record the chosen direction as `PRIMARY`; the remaining surviving directions become the Alt-1..Alt-K list (where K is the number of non-primary survivors, K ≤ N-1), numbered sequentially in their original direction order with no gaps for any dropped proposals. + +### Step 4.2: Infer Title + +Generate a 4-10 word Title Case title that captures the primary direction, not the original input phrasing verbatim. Example: idea `add undo/redo` with primary direction `command-pattern history` yields title `Command-Pattern Undo Stack For The Editor`. + +### Step 4.3: Populate the Template + +Read the template file located at `TEMPLATE_FILE` (from Phase 1 stdout). + +Produce the finalized draft content in memory by replacing placeholders: +- `` — the inferred title. +- `<ORIGINAL_IDEA>` — byte-identical value of `IDEA_BODY` captured in Phase 1. Preserve line breaks, trailing newline, and all formatting. Do NOT paraphrase or re-indent. +- `<PRIMARY_NAME>` — primary direction's short name. +- `<PRIMARY_RATIONALE>` — primary direction's rationale (from Phase 2). +- `<PRIMARY_APPROACH_SUMMARY>` — primary proposal's `APPROACH_SUMMARY`. +- `<PRIMARY_OBJECTIVE_EVIDENCE>` — primary proposal's `OBJECTIVE_EVIDENCE`, rendered as a bullet list. If the subagent returned only the literal sentinel `exploratory, no concrete precedent`, render it as a single bullet: `- exploratory, no concrete precedent`. +- `<PRIMARY_KNOWN_RISKS>` — primary proposal's `KNOWN_RISKS`, rendered as a bullet list. +- `<ALTERNATIVES>` — for each non-primary survivor at its Alt index `i` (1-based, sequential per Step 4.1), emit: + + ```markdown + ### Alt-<i>: <name> + - Gist: <one-paragraph summary derived from APPROACH_SUMMARY> + - Objective Evidence: + - <bullet from OBJECTIVE_EVIDENCE> + - ... + - Why not primary: <one sentence stating the tradeoff vs PRIMARY> + ``` + + Separate consecutive Alt entries with a single blank line. + +- `<SYNTHESIS_NOTES>` — one paragraph describing which elements from the alternatives could fold into the primary if the user chose a different direction. This is the lead's own synthesis note, not a subagent output. + +### Step 4.4: Write the Draft File + +Write the finalized content to `OUTPUT_FILE` using the `Write` tool. Single write; no progressive edits. + +### Step 4.5: Report + +Report to the user: +- Path written (`OUTPUT_FILE`). +- Primary direction name. +- Requested `N` and the actual direction count (note if reduced due to degradation). +- Next-step hint: `To turn this draft into a plan, run: /humanize:gen-plan --input <OUTPUT_FILE> --output <plan-path>`. + +--- + +## Error Handling + +- Phase 1 validation errors stop the command with a clear message. No partial output. +- Phase 2 degradation follows the retry-once + ≥2 minimum rule stated above. +- Phase 3 degradation follows the drop-and-continue + ≥2 minimum rule stated above. +- Never fabricate repo references or prior art. The `exploratory, no concrete precedent` sentinel from subagents is preserved verbatim in the draft. +- If any phase stops with an error, do not write a partial `OUTPUT_FILE`. diff --git a/commands/refine-plan.md b/commands/refine-plan.md index fc129016..0d97142f 100644 --- a/commands/refine-plan.md +++ b/commands/refine-plan.md @@ -34,7 +34,7 @@ The refined plan MUST reuse the existing `gen-plan` schema. Do not invent new to 1. **Execution Mode Setup**: Parse CLI arguments and derive output paths 2. **Load Project Config**: Resolve `alternative_plan_language` and mode defaults using `config-loader.sh` semantics 3. **IO Validation**: Run `validate-refine-plan-io.sh` -4. **Comment Extraction**: Scan the annotated plan and extract valid `CMT:` / `ENDCMT` blocks +4. **Comment Extraction**: Scan the annotated plan and extract valid comment blocks (`CMT:`/`ENDCMT`, `<cmt>`/`</cmt>`, `<comment>`/`</comment>`) 5. **Comment Classification**: Classify each extracted comment for downstream handling 6. **Comment Processing**: Answer questions, apply requested plan edits, and perform targeted research 7. **Plan Refinement**: Produce the comment-free refined plan while preserving the `gen-plan` structure @@ -167,7 +167,7 @@ Handle exit codes exactly: - Exit code 0: Continue to Phase 2 - Exit code 1: Report `Input file not found` and stop - Exit code 2: Report `Input file is empty` and stop -- Exit code 3: Report `Input file has no CMT:/ENDCMT blocks` and stop +- Exit code 3: Report `Input file has no comment blocks` and stop - Exit code 4: Report `Input file is missing required gen-plan sections` and stop - Exit code 5: Report `Output directory does not exist or is not writable - please fix it` and stop - Exit code 6: Report `QA directory is not writable` and stop @@ -196,17 +196,32 @@ Track these states while scanning the validated input in document order: Extraction rules: -1. Recognize `CMT:` as the start marker and `ENDCMT` as the end marker. -2. Support both inline and multi-line blocks: +1. Support three comment formats: + - Classic: `CMT:` as start marker and `ENDCMT` as end marker + - Short tag: `<cmt>` as start marker and `</cmt>` as end marker + - Long tag: `<comment>` as start marker and `</comment>` as end marker +2. Support both inline and multi-line blocks for all formats: - Inline: `Text before CMT: comment text ENDCMT text after` + - Inline: `Text before <cmt>comment text</cmt> text after` + - Inline: `Text before <comment>comment text</comment> text after` - Multi-line: ```markdown CMT: comment text ENDCMT ``` -3. Ignore `CMT:` and `ENDCMT` sequences inside fenced code blocks. -4. Ignore `CMT:` and `ENDCMT` sequences inside HTML comments. + ```markdown + <cmt> + comment text + </cmt> + ``` + ```markdown + <comment> + comment text + </comment> + ``` +3. Ignore comment markers inside fenced code blocks. +4. Ignore comment markers inside HTML comments. 5. Update `NEAREST_HEADING` whenever a Markdown heading is encountered outside fenced code and HTML comments. 6. Preserve surrounding non-comment text when removing inline comment blocks from the working plan text. 7. Assign raw comment IDs in document order as `CMT-1`, `CMT-2`, ... only for non-empty blocks. @@ -217,7 +232,7 @@ Extraction rules: For each non-empty comment block, capture: - `id` (`CMT-N`) -- `original_text` exactly as written between `CMT:` and `ENDCMT` +- `original_text` exactly as written between the comment markers - `normalized_text` with surrounding whitespace trimmed - `start_line`, `start_column` - `end_line`, `end_column` @@ -230,8 +245,8 @@ For each non-empty comment block, capture: These are fatal extraction errors: -1. Nested `CMT:` while already inside a comment block -2. `ENDCMT` encountered while not inside a comment block +1. Nested comment start marker while already inside a comment block +2. Comment end marker encountered while not inside a comment block or wrong end marker for the format 3. End of file reached while still inside a comment block Every fatal parse error MUST report: @@ -243,9 +258,9 @@ Every fatal parse error MUST report: Examples of acceptable messages: -- `Comment parse error: nested CMT block at line 48, column 3 near "## Acceptance Criteria" (context: "CMT: split AC-2...")` -- `Comment parse error: stray ENDCMT at line 109, column 1 near "## Task Breakdown" (context: "ENDCMT")` -- `Comment parse error: missing ENDCMT for block opened at line 72, column 5 near "## Dependencies and Sequence"` +- `Comment parse error: nested comment block at line 48, column 3 near "## Acceptance Criteria" (context: "<cmt>split AC-2...")` +- `Comment parse error: stray comment end marker at line 109, column 1 near "## Task Breakdown" (context: "</comment>")` +- `Comment parse error: missing end marker for block opened at line 72, column 5 near "## Dependencies and Sequence"` ### Outputs from Phase 2 @@ -403,7 +418,7 @@ Optional sections that MUST be preserved when present in the input: ### Refinement Rules -1. Remove every resolved `CMT:` / `ENDCMT` tag and all enclosed comment text from the refined plan. +1. Remove every resolved comment marker and all enclosed comment text from the refined plan. 2. Do not add any new top-level schema section. 3. Preserve `AC-X` / `AC-X.Y` formatting. 4. Preserve task IDs unless a comment explicitly requests a structural change. @@ -429,7 +444,7 @@ Rules: Before generating the QA document, verify: 1. All required sections are still present -2. No `CMT:` or `ENDCMT` markers remain +2. No comment markers remain 3. Every referenced `AC-*` exists 4. Every task dependency references an existing task ID or `-` 5. Every task row has exactly one valid routing tag: `coding` or `analyze` diff --git a/commands/start-pr-loop.md b/commands/start-pr-loop.md deleted file mode 100644 index aebdf207..00000000 --- a/commands/start-pr-loop.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -description: "Start PR review loop with bot monitoring" -argument-hint: "--claude|--codex [--max N] [--codex-model MODEL:EFFORT] [--codex-timeout SECONDS]" -allowed-tools: ["Bash(${CLAUDE_PLUGIN_ROOT}/scripts/setup-pr-loop.sh:*)"] ---- - -# Start PR Loop - -Execute the setup script to initialize the PR review loop: - -```bash -"${CLAUDE_PLUGIN_ROOT}/scripts/setup-pr-loop.sh" $ARGUMENTS -``` - -This command starts a PR review loop that: - -1. Detects the PR associated with the current branch -2. Fetches review comments from the specified bot(s) -3. You analyze and fix issues identified by the bot(s) -4. Push changes and trigger re-review by commenting @bot -5. Stop Hook polls for new bot reviews (every 30s, 15min timeout) -6. Local Codex validates if remote concerns are valid or approved - -## Bot Flags (Required) - -At least one bot flag is required: -- `--claude` - Monitor reviews from claude[bot] (trigger with @claude) -- `--codex` - Monitor reviews from chatgpt-codex-connector[bot] (trigger with @codex) - -## Comment Prioritization - -Comments are processed in this order: -1. **Human comments first** - They always take precedence over bots -2. **Bot comments** - Newest comments analyzed first - -## Workflow - -1. Analyze PR comments and fix issues -2. Commit and push changes -3. Comment on PR to trigger re-review using the bot mentions shown in the prompt -4. Write resolution summary to the specified file -5. Try to exit - Stop Hook intercepts and polls for bot reviews -6. If issues remain, receive feedback and continue -7. If all bots approve, loop ends - -**Note:** The setup script provides the exact mention string to use (e.g., `@claude @codex`). -Use whatever bot mentions are shown in the initial prompt - they match the flags you provided. - -## Important Rules - -1. **Write summaries**: Always write your resolution summary to the specified file before exiting -2. **Push changes**: Your fixes must be pushed for bots to review them -3. **Tag bots**: Use the correct @mention format to trigger bot reviews -4. **No cheating**: Do not try to exit the loop by editing state files or running cancel commands -5. **Trust the process**: The Stop Hook manages polling and Codex validation - -## Stopping the Loop - -- Reach the maximum iteration count -- All monitored bots approve the changes -- User runs `/humanize:cancel-pr-loop` diff --git a/commands/start-rlcr-loop.md b/commands/start-rlcr-loop.md index 90a2e59e..f24fb156 100644 --- a/commands/start-rlcr-loop.md +++ b/commands/start-rlcr-loop.md @@ -1,6 +1,6 @@ --- description: "Start iterative loop with Codex review" -argument-hint: "[path/to/plan.md | --plan-file path/to/plan.md] [--max N] [--codex-model MODEL:EFFORT] [--codex-timeout SECONDS] [--track-plan-file] [--push-every-round] [--base-branch BRANCH] [--full-review-round N] [--skip-impl] [--claude-answer-codex] [--agent-teams] [--yolo] [--skip-quiz]" +argument-hint: "[path/to/plan.md | --plan-file path/to/plan.md] [--max N] [--codex-model MODEL:EFFORT] [--codex-timeout SECONDS] [--track-plan-file] [--push-every-round] [--base-branch BRANCH] [--full-review-round N] [--skip-impl] [--claude-answer-codex] [--agent-teams] [--yolo] [--skip-quiz] [--privacy]" allowed-tools: - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/setup-rlcr-loop.sh:*)" - "Read" @@ -127,6 +127,15 @@ This command starts an iterative development loop where: 7. If code review finds issues (`[P0-9]` markers), you fix them and continue 8. When no issues are found, the loop ends with a Finalize Phase +## What Is a Round + +**One round = the agent believes the entire plan is finished.** A round boundary is when the agent writes a summary and attempts to exit, triggering Codex review. This is the fundamental semantic: + +- A round is NOT one task, one milestone, one stage, or one layer of the plan. +- If the plan has multiple stages or milestones, they are all completed within a single round before writing the round summary. +- Intermediate progress checks (e.g., verifying a stage before starting the next) should use manual `ask-codex` calls, not round boundaries. +- Only write `round-N-summary.md` and attempt to exit when you believe ALL tasks in the plan are done. + ## Goal Tracker System This loop uses a **Goal Tracker** to prevent goal drift across iterations: diff --git a/config/codex-hooks.json b/config/codex-hooks.json new file mode 100644 index 00000000..98d31c33 --- /dev/null +++ b/config/codex-hooks.json @@ -0,0 +1,17 @@ +{ + "description": "Humanize Codex Hooks - Native Stop hooks for RLCR loops", + "hooks": { + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "{{HUMANIZE_RUNTIME_ROOT}}/hooks/loop-codex-stop-hook.sh", + "timeout": 7200, + "statusMessage": "humanize RLCR stop hook" + } + ] + } + ] + } +} diff --git a/config/default_config.json b/config/default_config.json index 6238dce2..af41af12 100644 --- a/config/default_config.json +++ b/config/default_config.json @@ -1,5 +1,5 @@ { - "codex_model": "gpt-5.4", + "codex_model": "gpt-5.5", "codex_effort": "high", "bitlesson_model": "haiku", "agent_teams": false, diff --git a/docs/bitlesson.md b/docs/bitlesson.md index bb2c3bac..01bb32e5 100644 --- a/docs/bitlesson.md +++ b/docs/bitlesson.md @@ -18,6 +18,10 @@ Provider routing is automatic: If the configured provider binary is missing, the selector falls back to the default Codex model so the loop can still proceed. +On Codex-only installs, Humanize writes `provider_mode: "codex-only"` into the user config. +When that mode is present, the selector forces BitLesson selection onto the Codex/OpenAI path +before provider resolution, even if an older default such as `haiku` would otherwise route to Claude. + ## Workflow Each project keeps its BitLesson knowledge base at `.humanize/bitlesson.md`. diff --git a/docs/install-for-claude.md b/docs/install-for-claude.md index 92cdb7e9..06b6991e 100644 --- a/docs/install-for-claude.md +++ b/docs/install-for-claude.md @@ -68,7 +68,6 @@ Then use: ```bash humanize monitor rlcr # Monitor RLCR loop -humanize monitor pr # Monitor PR loop ``` ## Other Install Guides diff --git a/docs/install-for-codex.md b/docs/install-for-codex.md index 87c754ce..2c70a1cc 100644 --- a/docs/install-for-codex.md +++ b/docs/install-for-codex.md @@ -1,6 +1,6 @@ # Install Humanize Skills for Codex -This guide explains how to install the Humanize skills for Codex skill runtime (`$CODEX_HOME/skills`). +This guide explains how to install Humanize for Codex CLI, including the skill runtime (`$CODEX_HOME/skills`) and the native Codex `Stop` hook (`$CODEX_HOME/hooks.json`). ## Quick Install (Recommended) @@ -25,7 +25,13 @@ Or use the unified installer directly: This will: - Sync `humanize`, `humanize-gen-plan`, `humanize-refine-plan`, and `humanize-rlcr` into `${CODEX_HOME:-~/.codex}/skills` - Copy runtime dependencies into `${CODEX_HOME:-~/.codex}/skills/humanize` -- Use RLCR defaults: `codex exec` with `gpt-5.4:high`, `codex review` with `gpt-5.4:high` +- Install/update native Humanize Stop hooks in `${CODEX_HOME:-~/.codex}/hooks.json` +- Enable the experimental `codex_hooks` feature in `${CODEX_HOME:-~/.codex}/config.toml` when `codex` is available +- Seed `~/.config/humanize/config.json` with a Codex/OpenAI `bitlesson_model` when that key is not already set +- Mark the install as `provider_mode: "codex-only"` when using `--target codex` +- Use RLCR defaults: `codex exec` with `gpt-5.5:high`, `codex review` with `gpt-5.5:high` + +Requires Codex CLI `0.114.0` or newer for native hooks. Older Codex builds are not supported by the Codex install path. ## Verify @@ -58,6 +64,21 @@ Installed files/directories: - `${CODEX_HOME:-~/.codex}/skills/humanize/templates/` - `${CODEX_HOME:-~/.codex}/skills/humanize/config/` - `${CODEX_HOME:-~/.codex}/skills/humanize/agents/` +- `${CODEX_HOME:-~/.codex}/hooks.json` +- `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` (created or updated only when Humanize config keys are unset) + +Verify native hooks: + +```bash +codex features list | rg codex_hooks +sed -n '1,220p' "${CODEX_HOME:-$HOME/.codex}/hooks.json" +``` + +Expected: +- `codex_hooks` is `true` +- `hooks.json` contains `loop-codex-stop-hook.sh` +- `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` contains `bitlesson_model` set to a Codex/OpenAI model such as `gpt-5.5` +- for `--target codex`, `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` also contains `provider_mode: "codex-only"` ## Optional: Install for Both Codex and Kimi @@ -73,6 +94,9 @@ Installed files/directories: # Custom Codex skills dir ./scripts/install-skills-codex.sh --codex-skills-dir /custom/codex/skills + +# Reinstall only the native hooks/config +./scripts/install-codex-hooks.sh ``` ## Troubleshooting @@ -82,3 +106,10 @@ If scripts are not found from installed skills: ```bash ls -la "${CODEX_HOME:-$HOME/.codex}/skills/humanize/scripts" ``` + +If native exit gating does not trigger: + +```bash +codex features enable codex_hooks +sed -n '1,220p' "${CODEX_HOME:-$HOME/.codex}/hooks.json" +``` diff --git a/docs/install-for-kimi.md b/docs/install-for-kimi.md index ee217d70..c947ffac 100644 --- a/docs/install-for-kimi.md +++ b/docs/install-for-kimi.md @@ -169,7 +169,7 @@ Look for the "Skills" section in the help output. |--------|-------------|---------| | `path/to/plan.md` | Plan file path | Required (unless --skip-impl) | | `--max N` | Maximum iterations | 42 | -| `--codex-model MODEL:EFFORT` | Codex model | gpt-5.4:high | +| `--codex-model MODEL:EFFORT` | Codex model | gpt-5.5:high | | `--codex-timeout SECONDS` | Review timeout | 5400 | | `--base-branch BRANCH` | Base for code review | auto-detect | | `--full-review-round N` | Full alignment check interval | 5 | @@ -191,7 +191,7 @@ Ensure you have `codex` CLI installed: codex --version ``` -The skills will use `gpt-5.4` with `high` effort level by default. +The skills will use `gpt-5.5` with `high` effort level by default. ## Uninstall diff --git a/docs/usage.md b/docs/usage.md index a27a80cb..313ae7ce 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -46,7 +46,7 @@ The quiz is advisory, not a gate. You always have the option to proceed. But tha ```bash /humanize:gen-plan --input draft.md --output docs/plan.md ``` -2. If the plan is reviewed with `CMT:` ... `ENDCMT` annotations, refine it and generate a QA ledger: +2. If the plan is reviewed with comment annotations, refine it and generate a QA ledger: ```bash /humanize:refine-plan --input docs/plan.md ``` @@ -63,8 +63,6 @@ The quiz is advisory, not a gate. You always have the option to proceed. But tha | `/cancel-rlcr-loop` | Cancel active loop | | `/gen-plan --input <draft.md> --output <plan.md>` | Generate structured plan from draft | | `/refine-plan --input <annotated-plan.md>` | Refine an annotated plan and generate a QA ledger | -| `/start-pr-loop --claude\|--codex` | Start PR review loop with bot monitoring | -| `/cancel-pr-loop` | Cancel active PR loop | | `/ask-codex [question]` | One-shot consultation with Codex | ## Command Reference @@ -78,7 +76,7 @@ OPTIONS: --plan-file <path> Explicit plan file path (alternative to positional arg) --max <N> Maximum iterations before auto-stop (default: 42) --codex-model <MODEL:EFFORT> - Codex model and reasoning effort (default from config, fallback gpt-5.4:high) + Codex model and reasoning effort (default from config, fallback gpt-5.5:high) --codex-timeout <SECONDS> Timeout for each Codex review in seconds (default: 5400) --track-plan-file Indicate plan file should be tracked in git (must be clean) @@ -127,7 +125,7 @@ Workflow: 5. Generates a structured plan.md with acceptance criteria 6. Optionally starts `/humanize:start-rlcr-loop` if `--auto-start-rlcr-if-converged` conditions are met -If reviewers later annotate the generated plan with `CMT:` ... `ENDCMT` blocks, run +If reviewers later annotate the generated plan with comment blocks, run `/humanize:refine-plan --input <plan.md>` before starting or resuming implementation. ### refine-plan @@ -169,9 +167,10 @@ how each comment was handled. **Annotated comment block format:** -`refine-plan` looks for reviewer comments wrapped in `CMT:` and `ENDCMT` markers. Both inline -and multi-line comment blocks are supported: +`refine-plan` supports three comment formats for reviewer annotations. Both inline +and multi-line comment blocks are supported in all formats: +**Classic format (CMT:/ENDCMT):** ```markdown Text before CMT: clarify why AC-3 is split here ENDCMT text after ``` @@ -183,11 +182,36 @@ If the dependency is unclear, add a pending decision instead of guessing. ENDCMT ``` +**Short tag format (<cmt></cmt>):** +```markdown +Text before <cmt>clarify why AC-3 is split here</cmt> text after +``` + +```markdown +<cmt> +Please investigate whether this task should depend on task4 or task5. +If the dependency is unclear, add a pending decision instead of guessing. +</cmt> +``` + +**Long tag format (<comment></comment>):** +```markdown +Text before <comment>clarify why AC-3 is split here</comment> text after +``` + +```markdown +<comment> +Please investigate whether this task should depend on task4 or task5. +If the dependency is unclear, add a pending decision instead of guessing. +</comment> +``` + Rules: -- At least one non-empty `CMT:` block must exist in the input file. -- `CMT:` and `ENDCMT` markers inside fenced code blocks or HTML comments are ignored. +- At least one non-empty comment block must exist in the input file. +- Comment markers inside fenced code blocks or HTML comments are ignored. - Empty comment blocks are removed but do not create QA ledger entries. - The input plan must still follow the `gen-plan` section schema. +- All three formats can be mixed within the same file. **QA output structure:** @@ -209,39 +233,6 @@ If `--alt-language` is set to a supported non-English language, the command also translated plan and QA variants by inserting `_<code>` before the file extension, such as `plan_zh.md` and `plan-qa_zh.md`. -### start-pr-loop - -``` -/humanize:start-pr-loop --claude|--codex [OPTIONS] - -BOT FLAGS (at least one required): - --claude Monitor reviews from claude[bot] (trigger with @claude) - --codex Monitor reviews from chatgpt-codex-connector[bot] (trigger with @codex) - -OPTIONS: - --max <N> Maximum iterations before auto-stop (default: 42) - --codex-model <MODEL:EFFORT> - Codex model and reasoning effort (default from config, effort: medium) - --codex-timeout <SECONDS> - Timeout for each Codex review in seconds (default: 900) - -h, --help Show help message -``` - -The PR loop automates the process of handling GitHub PR reviews from remote bots: - -1. Detects the PR associated with the current branch -2. Fetches review comments from the specified bot(s) -3. Claude analyzes and fixes issues identified by the bot(s) -4. Pushes changes and triggers re-review by commenting @bot -5. Stop Hook polls for new bot reviews (every 30s, 15min timeout per bot) -6. Local Codex validates if remote concerns are approved or have issues -7. Loop continues until all bots approve or max iterations reached - -**Prerequisites:** -- GitHub CLI (`gh`) must be installed and authenticated -- Codex CLI must be installed -- Current branch must have an associated open PR - ### ask-codex ``` @@ -249,7 +240,7 @@ The PR loop automates the process of handling GitHub PR reviews from remote bots OPTIONS: --codex-model <MODEL:EFFORT> - Codex model and reasoning effort (default from config, fallback gpt-5.4:high) + Codex model and reasoning effort (default from config, fallback gpt-5.5:high) --codex-timeout <SECONDS> Timeout for the Codex query in seconds (default: 3600) -h, --help Show help message @@ -274,20 +265,21 @@ Current built-in keys: | Key | Default | Description | |-----|---------|-------------| -| `codex_model` | `gpt-5.4` | Shared default model for Codex-backed review and analysis | +| `codex_model` | `gpt-5.5` | Shared default model for Codex-backed review and analysis | | `codex_effort` | `high` | Shared default reasoning effort (`xhigh`, `high`, `medium`, `low`) | | `bitlesson_model` | `haiku` | Model used by the BitLesson selector agent | +| `provider_mode` | unset | Optional runtime mode hint such as `codex-only` | | `agent_teams` | `false` | Project-level default for agent teams workflow | | `alternative_plan_language` | `""` | Optional translated plan variant language; supported values include `Chinese`, `Korean`, `Japanese`, `Spanish`, `French`, `German`, `Portuguese`, `Russian`, `Arabic`, or ISO codes like `zh` | | `gen_plan_mode` | `discussion` | Default plan-generation mode | ### Codex Model Configuration -All Codex-using features (RLCR loop, PR loop, ask-codex) share the same model configuration: +All Codex-using features (RLCR loop, ask-codex) share the same model configuration: | Key | Default | Description | |-----|---------|-------------| -| `codex_model` | `gpt-5.4` | Model used for Codex operations (reviews, analysis, queries) | +| `codex_model` | `gpt-5.5` | Model used for Codex operations (reviews, analysis, queries) | | `codex_effort` | `high` | Reasoning effort (`xhigh`, `high`, `medium`, `low`) | To override, add to `.humanize/config.json`: @@ -300,11 +292,15 @@ To override, add to `.humanize/config.json`: } ``` +On Codex installs, Humanize also seeds `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` +with a Codex/OpenAI `bitlesson_model` and `provider_mode: "codex-only"` when those keys +are unset, so BitLesson selection stays on the Codex/OpenAI path without probing Claude. + Codex model is resolved with this precedence: 1. CLI `--codex-model` flag (highest priority) -2. Feature-specific defaults (e.g., PR loop defaults to `medium` effort) +2. Feature-specific defaults 3. Config-backed defaults from the 4-layer hierarchy above -4. Hardcoded fallback (`gpt-5.4:high`) +4. Hardcoded fallback (`gpt-5.5:high`) **Migration note**: If your `.humanize/config.json` contains the legacy keys `loop_reviewer_model` or `loop_reviewer_effort`, they are silently ignored. @@ -322,8 +318,6 @@ source ~/.claude/plugins/cache/PolyArch/humanize/<LATEST.VERSION>/scripts/humani # Monitor RLCR loop progress humanize monitor rlcr -# Monitor PR loop progress -humanize monitor pr ``` Progress data is stored in `.humanize/rlcr/<timestamp>/` for each loop session. @@ -331,7 +325,6 @@ Progress data is stored in `.humanize/rlcr/<timestamp>/` for each loop session. ## Cancellation - **RLCR loop**: `/humanize:cancel-rlcr-loop` -- **PR loop**: `/humanize:cancel-pr-loop` ## Environment Variables diff --git a/hooks/check-todos-from-transcript.py b/hooks/check-todos-from-transcript.py index af577a5c..31ec6e5e 100755 --- a/hooks/check-todos-from-transcript.py +++ b/hooks/check-todos-from-transcript.py @@ -15,11 +15,26 @@ echo '{"session_id": "...", "transcript_path": "/path/to/transcript.jsonl"}' | python3 check-todos-from-transcript.py """ import json +import re import sys from pathlib import Path from typing import List, Tuple +LANE_PREFIX_PATTERN = re.compile(r"^\s*\[(mainline|blocking|queued)\](?:\s|$)", re.IGNORECASE) + + +def classify_lane(*parts: str) -> str: + """Infer the task lane from content, defaulting to blocking for safety.""" + for part in parts: + if not part: + continue + match = LANE_PREFIX_PATTERN.match(part) + if match: + return match.group(1).lower() + return "blocking" + + def extract_tool_calls_from_entry(entry: dict) -> List[Tuple[str, dict]]: """ Extract tool calls from a transcript entry. @@ -92,10 +107,14 @@ def find_incomplete_todos_from_transcript(transcript_path: Path) -> List[dict]: status = todo.get("status", "") content = todo.get("content", "") if status != "completed": + lane = classify_lane(content) + if lane == "queued": + continue incomplete.append({ "status": status, "content": content, "source": "todo", + "lane": lane, }) return incomplete @@ -134,11 +153,15 @@ def find_incomplete_tasks_from_directory(session_id: str, tasks_base_dir: str = description = task.get("description", "") task_id = task_file.stem # Filename without .json content = subject or description or f"Task {task_id}" + lane = classify_lane(subject, description) + if lane == "queued": + continue incomplete.append({ "status": status, "content": content, "source": "task", "task_id": task_id, + "lane": lane, }) except (json.JSONDecodeError, OSError): # Skip malformed or unreadable task files @@ -184,11 +207,13 @@ def main(): status = item.get("status", "unknown") content = item.get("content", "") source = item.get("source", "unknown") + lane = item.get("lane", "blocking") + lane_marker = f"[{lane}]" if source == "task": task_id = item.get("task_id", "?") - output_lines.append(f" - [{status}] (Task #{task_id}) {content}") + output_lines.append(f" - [{status}] {lane_marker} (Task #{task_id}) {content}") else: - output_lines.append(f" - [{status}] {content}") + output_lines.append(f" - [{status}] {lane_marker} {content}") # Output marker and incomplete items both to stdout print("INCOMPLETE_TODOS") diff --git a/hooks/hooks.json b/hooks/hooks.json index e25ebe30..dcb99266 100644 --- a/hooks/hooks.json +++ b/hooks/hooks.json @@ -1,5 +1,5 @@ { - "description": "Humanize Plugin Hooks - Validation hooks and Stop hooks for /start-rlcr-loop and /start-pr-loop", + "description": "Humanize Plugin Hooks - Validation hooks and Stop hooks for /start-rlcr-loop", "hooks": { "UserPromptSubmit": [ { @@ -67,11 +67,6 @@ "type": "command", "command": "${CLAUDE_PLUGIN_ROOT}/hooks/loop-codex-stop-hook.sh", "timeout": 7200 - }, - { - "type": "command", - "command": "${CLAUDE_PLUGIN_ROOT}/hooks/pr-loop-stop-hook.sh", - "timeout": 7200 } ] } diff --git a/hooks/lib/loop-bg-tasks.sh b/hooks/lib/loop-bg-tasks.sh new file mode 100755 index 00000000..08eba146 --- /dev/null +++ b/hooks/lib/loop-bg-tasks.sh @@ -0,0 +1,435 @@ +#!/usr/bin/env bash +# +# Background-task helpers for the RLCR stop hook. +# +# Owns all logic that inspects the Claude Code transcript to decide +# whether the hook should short-circuit (the main session is still +# waiting on an asynchronous Agent/Bash dispatch), plus the four guard +# blocks that the stop hook runs before its normal gate logic: +# +# 1. Ambiguous-caller marker guard +# 2. Cross-session parked-loop guard +# 3. Early exit: pending background tasks +# 4. Same-session stale-marker cleanup +# +# Depends on loop-common.sh (FIELD_SESSION_ID, resolve_active_state_file) +# being sourced first. +# + +# Source guard. +[[ -n "${_LOOP_BG_TASKS_LOADED:-}" ]] && return 0 2>/dev/null || true +_LOOP_BG_TASKS_LOADED=1 + +# Expand a leading "~" or "~/" in a path to "$HOME" without using eval. +# Only the bare "~" and "~/..." forms are expanded; "~user/..." and every +# other input (absolute path, relative path, empty string) is returned verbatim. +# +# Usage: expand_leading_tilde "$path" +# Prints the normalized path to stdout. +expand_leading_tilde() { + local path="$1" + case "$path" in + '~') printf '%s' "${HOME:-}" ;; + '~/'*) printf '%s/%s' "${HOME:-}" "${path#'~/'}" ;; + *) printf '%s' "$path" ;; + esac +} + +# Extract transcript_path from hook JSON input and expand any leading tilde. +# Usage: extract_transcript_path "$json_input" +# Outputs the transcript_path to stdout, or empty string if not available. +extract_transcript_path() { + local input="$1" + local raw + raw=$(printf '%s' "$input" | jq -r '.transcript_path // empty' 2>/dev/null || echo "") + expand_leading_tilde "$raw" +} + +# Convert an RLCR loop dir basename to a lexically-comparable ISO-8601 +# UTC timestamp suitable for filtering transcript events. +# +# `setup-rlcr-loop.sh` creates loop dirs named `YYYY-MM-DD_HH-MM-SS` in +# the system's LOCAL wall clock (it calls `date +%Y-%m-%d_%H-%M-%S` +# without `-u`). Claude transcript events carry actual UTC timestamps +# like `2026-04-16T13:19:26.819Z`. To compare them correctly, this +# helper converts the local wall-clock parse back to a real UTC moment +# via a two-step: parse local -> epoch seconds -> format in UTC. +# +# The `.000Z` suffix keeps sub-second transcript timestamps in the same +# second compared greater via lexical string ordering. +# +# Usage: derive_loop_start_iso_ts "$loop_dir" +# Prints the ISO-8601 UTC timestamp, or empty string when the +# basename does not match the expected format or the local `date` +# binary cannot parse it. +derive_loop_start_iso_ts() { + local loop_dir="$1" + local base + base=$(basename "$loop_dir" 2>/dev/null || echo "") + if [[ ! "$base" =~ ^([0-9]{4}-[0-9]{2}-[0-9]{2})_([0-9]{2})-([0-9]{2})-([0-9]{2})$ ]]; then + return + fi + local local_datetime + local_datetime="${BASH_REMATCH[1]} ${BASH_REMATCH[2]}:${BASH_REMATCH[3]}:${BASH_REMATCH[4]}" + + # Local wall-clock -> epoch seconds. GNU `date -d` first, + # BSD/macOS `date -j -f ...` second. Both honour the caller's TZ + # for interpretation, matching setup-rlcr-loop.sh's behaviour at + # loop-dir creation time. + local epoch + epoch=$(date -d "$local_datetime" +%s 2>/dev/null) || epoch="" + if [[ -z "$epoch" ]]; then + epoch=$(date -j -f "%Y-%m-%d %H:%M:%S" "$local_datetime" +%s 2>/dev/null) || epoch="" + fi + if [[ -z "$epoch" ]]; then + return + fi + + # Epoch -> UTC ISO-8601. Try GNU then BSD. + local utc_iso + utc_iso=$(date -u -d "@$epoch" "+%Y-%m-%dT%H:%M:%S.000Z" 2>/dev/null) || utc_iso="" + if [[ -z "$utc_iso" ]]; then + utc_iso=$(date -u -r "$epoch" "+%Y-%m-%dT%H:%M:%S.000Z" 2>/dev/null) || utc_iso="" + fi + printf '%s' "$utc_iso" +} + +# Derive the Claude Code task-output directory from a transcript path. +# +# Claude Code writes background-task output files under: +# /tmp/claude-<uid>/<project-slug>/<session-id>/tasks/<task-id>.output +# +# The project slug and session id are encoded in the transcript path: +# <claude-home>/projects/<slug>/<session-id>.jsonl +# +# Usage: derive_tasks_dir_from_transcript "$transcript_path" +# Prints the tasks dir path, or nothing when derivation fails. +derive_tasks_dir_from_transcript() { + local transcript_path="$1" + [[ -z "$transcript_path" ]] && return + local slug sid uid + slug=$(basename "$(dirname "$transcript_path")" 2>/dev/null) + sid=$(basename "$transcript_path" .jsonl 2>/dev/null) + uid=$(id -u 2>/dev/null) || return + if [[ -z "$slug" ]] || [[ "$slug" == "." ]] || [[ -z "$sid" ]] || [[ -z "$uid" ]]; then + return + fi + printf '/tmp/claude-%s/%s/%s/tasks' "$uid" "$slug" "$sid" +} + +# Returns 0 if the background task identified by task_id appears to be alive +# (output file absent, or lsof reports >= 1 holder), 1 if confirmed dead +# (output file exists and lsof reports 0 holders). +# +# Fail-open: returns 0 (alive) when the output file does not exist, when +# the lsof binary is unavailable, or when lsof exits non-zero for any +# reason other than "no holders". +# +# Set LSOF_BIN to override the lsof binary path (used in tests). +# +# Usage: is_bg_task_alive "$task_id" "$tasks_dir" +is_bg_task_alive() { + local task_id="$1" tasks_dir="$2" + local lsof_bin="${LSOF_BIN:-lsof}" + local output_file="$tasks_dir/$task_id.output" + # Output file absent -> fail open (treat as still running). + [[ -f "$output_file" ]] || return 0 + # lsof unavailable -> fail open. + command -v "$lsof_bin" >/dev/null 2>&1 || return 0 + # lsof exits 0 when >= 1 process has the file open, 1 otherwise. + "$lsof_bin" "$output_file" >/dev/null 2>&1 +} + +# Filter a newline-delimited list of task IDs, retaining only those that +# pass is_bg_task_alive. Prints surviving IDs one per line. +# +# Usage: prune_dead_bg_task_ids "$pending_ids" "$tasks_dir" +prune_dead_bg_task_ids() { + local pending_ids="$1" tasks_dir="$2" + local task_id + while IFS= read -r task_id; do + [[ -z "$task_id" ]] && continue + is_bg_task_alive "$task_id" "$tasks_dir" && printf '%s\n' "$task_id" + done <<< "$pending_ids" +} + +# Enumerate background-task ids that have been launched but not yet marked +# completed in a Claude Code transcript.jsonl. +# +# Launch events (inspected in tool_result "user" messages): +# - Background subagent: toolUseResult.isAsync == true +# -> id is toolUseResult.agentId +# - Background shell: toolUseResult.backgroundTaskId non-empty +# -> id is toolUseResult.backgroundTaskId +# +# Completion events are recognised from two Claude Code transcript forms: +# +# 1. Structured SDK record +# (see SDKTaskNotificationMessage in docs/typescript.md): +# `type == "system"`, `subtype == "task_notification"`, +# `task_id` is the completed id. Any `status` value +# (completed, failed, stopped, ...) is treated as terminal. +# +# 2. Legacy queue-operation enqueue whose `content` embeds a +# `<task-notification>` XML block with `<task-id>...</task-id>`; +# kept for transcripts produced by older Claude Code versions. +# +# pending := launched \ completed +# +# Optional second argument `since_ts` (ISO-8601 string, e.g. the value +# returned by `derive_loop_start_iso_ts`): when provided, only launch +# events whose top-level `.timestamp` field is >= `since_ts` count as +# candidate launches. Events without a `.timestamp` are included (keeps +# fixture transcripts and older record formats working). This keeps +# pre-loop session-wide background work from pinning an RLCR loop that +# has no pending work of its own. +# +# Usage: list_pending_background_task_ids "$transcript_path" [since_ts] +# - Outputs one id per line on stdout (possibly empty). +# - Returns 0 when the transcript is readable (including when there are +# no pending tasks). Returns 1 when the transcript path is empty, not +# a regular file, or jq is unavailable, so callers must treat non-zero +# as "unknown -> do not short-circuit". +list_pending_background_task_ids() { + local transcript_path="$1" + local since_ts="${2:-}" + + # Normalize a leading tilde so direct callers (tests, ad-hoc scripts) + # work correctly even when transcript_path was not routed through + # extract_transcript_path. + transcript_path=$(expand_leading_tilde "$transcript_path") + + if [[ -z "$transcript_path" ]] || [[ ! -f "$transcript_path" ]]; then + return 1 + fi + if ! command -v jq >/dev/null 2>&1; then + return 1 + fi + + local launched completed + launched=$(jq -r --arg since_ts "$since_ts" ' + select(.toolUseResult != null) + | select( + ($since_ts == "" + or ((.timestamp // "") == "") + or ((.timestamp // "") >= $since_ts)) + ) + | select( + (.toolUseResult.isAsync == true and (.toolUseResult.agentId // "") != "") + or ((.toolUseResult.backgroundTaskId // "") != "") + ) + | (.toolUseResult.agentId // .toolUseResult.backgroundTaskId) + ' "$transcript_path" 2>/dev/null | sort -u) || return 1 + + # Union of both completion formats. Either source alone is enough to + # mark a launched id terminal. + # + # The `grep -oE || true` guard on the legacy branch keeps `set -o + # pipefail` from poisoning the combined pipeline when no legacy + # queue-operation records exist in the transcript (grep with `-o` + # exits 1 on no matches, which would otherwise wipe out any SDK + # task_notification results collected above). + completed=$( + { + jq -r ' + select(.type == "system" and .subtype == "task_notification") + | (.task_id // empty) + ' "$transcript_path" 2>/dev/null + jq -r ' + select(.type == "queue-operation" and .operation == "enqueue") + | (.content // "" | tostring) + | select(contains("<task-notification>")) + ' "$transcript_path" 2>/dev/null \ + | { grep -oE '<task-id>[^<]+</task-id>' || true; } \ + | sed -E 's|</?task-id>||g' + } | sort -u | sed '/^$/d' + ) || completed="" + + # Collect launched ids that have no matching completion notification. + local pending + pending=$(comm -23 \ + <(printf '%s\n' "$launched" | sed '/^$/d') \ + <(printf '%s\n' "$completed" | sed '/^$/d')) + + # Apply liveness probe: drop orphaned task IDs whose output file exists + # but has zero open file descriptors (killed without a completion event). + if [[ -n "$pending" ]]; then + local tasks_dir + tasks_dir=$(derive_tasks_dir_from_transcript "$transcript_path") + if [[ -n "$tasks_dir" ]]; then + pending=$(prune_dead_bg_task_ids "$pending" "$tasks_dir") + fi + fi + + printf '%s\n' "$pending" | sed '/^$/d' +} + +# Returns 0 when the transcript shows at least one pending background task. +# Returns 1 when no pending tasks are detected (including fail-closed cases +# like missing transcript, non-file path, or jq unavailable). +# +# Usage: has_pending_background_tasks "$transcript_path" [since_ts] +has_pending_background_tasks() { + local transcript_path="$1" + local since_ts="${2:-}" + local pending + pending=$(list_pending_background_task_ids "$transcript_path" "$since_ts" 2>/dev/null) || return 1 + [[ -n "$pending" ]] +} + +# Prints the count of pending background tasks to stdout. Prints 0 for any +# error case so callers can still format messages safely. +# +# Usage: count_pending_background_tasks "$transcript_path" [since_ts] +count_pending_background_tasks() { + local transcript_path="$1" + local since_ts="${2:-}" + local pending + pending=$(list_pending_background_task_ids "$transcript_path" "$since_ts" 2>/dev/null) || { + echo 0 + return 0 + } + if [[ -z "$pending" ]]; then + echo 0 + else + printf '%s\n' "$pending" | sed '/^$/d' | wc -l | tr -d ' ' + fi +} + +# Single entry point for the stop hook: runs the four guard blocks +# (ambiguous-caller, cross-session parked, pending-bg short-circuit, +# same-session stale-marker cleanup) in order. When a guard decides to +# short-circuit the stop hook, it emits the appropriate JSON on stdout +# and `exit 0`s directly; the caller (sourcing the hook script) never +# returns. When no guard fires, this function returns 0 and the stop +# hook continues into its normal gate logic. +# +# Depends on FIELD_SESSION_ID and resolve_active_state_file from +# loop-common.sh. +# +# Usage: handle_bg_task_short_circuit "$LOOP_DIR" "$HOOK_INPUT" "$HOOK_SESSION_ID" +handle_bg_task_short_circuit() { + local loop_dir="$1" hook_input="$2" hook_session_id="$3" + + # Shared state used by the guard blocks below. + # Loop-start boundary: derived from the loop dir basename + # (`YYYY-MM-DD_HH-MM-SS`). Empty means derivation failed; helpers + # treat empty since_ts as no boundary. + local loop_start_ts transcript_path + loop_start_ts=$(derive_loop_start_iso_ts "$loop_dir") + transcript_path=$(extract_transcript_path "$hook_input") + + # ---------------------------------------- + # Ambiguous-Caller Marker Guard + # ---------------------------------------- + # If a bg-pending.marker is present but we have no session_id on + # this hook invocation (typical of scripts/rlcr-stop-gate.sh + # invoked without --session-id, or any other caller that doesn't + # forward session_id), we cannot tell whether this caller owns the + # parked loop. Taking either branch (foreign-session guard below, + # or same-session cleanup further down) would be wrong in one of + # the two possible realities. Exit 0 silently: the real Claude + # hook will arrive with session_id populated and drive parking / + # cleanup from an authoritative context. + if [[ -f "$loop_dir/bg-pending.marker" ]] && [[ -z "$hook_session_id" ]]; then + exit 0 + fi + + # ---------------------------------------- + # Cross-Session Parked-Loop Guard + # ---------------------------------------- + # If find_active_loop handed this dir over via the marker fallback, + # the loop is parked by a different session waiting on a background + # task. The current session has no authority to inspect or advance + # that loop - its transcript sees none of the foreign bg activity - + # so the only safe response is to exit 0 with a distinct + # systemMessage and leave every on-disk artifact (state file, + # stored session_id, marker) untouched. + # + # Both sides of the session-id comparison must be non-empty for + # this branch to trigger: an empty hook_session_id has already + # exited above via the ambiguous-caller guard, and an empty stored + # session_id keeps the backward-compat "matches any" semantics + # from find_active_loop. + if [[ -f "$loop_dir/bg-pending.marker" ]]; then + local guard_state_file guard_stored_sid + guard_state_file=$(resolve_active_state_file "$loop_dir") + if [[ -n "$guard_state_file" ]]; then + guard_stored_sid=$(sed -n '/^---$/,/^---$/{ /^'"${FIELD_SESSION_ID}"':/{ s/^'"${FIELD_SESSION_ID}"': *//; p; } }' "$guard_state_file" 2>/dev/null | tr -d ' ') + if [[ -n "$guard_stored_sid" ]] \ + && [[ -n "$hook_session_id" ]] \ + && [[ "$guard_stored_sid" != "$hook_session_id" ]]; then + jq -n \ + '{systemMessage: "RLCR loop in this repo is parked by another Claude session waiting for background work. Stop allowed; your session leaves the loop untouched. If that session ended, run /humanize:cancel-rlcr-loop to clean up."}' + exit 0 + fi + fi + fi + + # ---------------------------------------- + # Early Exit: Pending Background Tasks + # ---------------------------------------- + # When the main Claude Code session has dispatched background work + # (Agent with run_in_background=true, or Bash with + # run_in_background=true) whose completion notifications have not + # yet arrived, the natural "stop" is simply "I am waiting for the + # background task". Running git/summary/BitLesson/Codex gates in + # that state wastes Codex tokens and produces low-signal reviews. + # + # Allow the stop (exit 0) and emit a user-visible systemMessage so + # nobody mistakes the pause for loop completion. The on-disk loop + # state is left untouched -- the next natural stop (after + # background work finishes) will re-enter this hook with no + # pending tasks and run the normal flow. + # + # loop_start_ts confines the transcript scan to launches that + # actually happened during this loop; earlier session-wide bg + # activity cannot pin the loop. + # + # This check MUST run before any other gate (phase detection, + # state parsing, branch / plan / git-clean / summary / max-iter + # checks, Codex review). + local pending_bg_ids + pending_bg_ids=$(list_pending_background_task_ids "$transcript_path" "$loop_start_ts" 2>/dev/null) || true + if [[ -n "$pending_bg_ids" ]]; then + local pending_bg_count + pending_bg_count=$(printf '%s\n' "$pending_bg_ids" | sed '/^$/d' | wc -l | tr -d ' ') + # Mark the loop as parked; allows the same session to resume + # later and makes the cross-session guard above reachable if + # the user opens a different Claude session in this repo + # before the bg task completes. + : > "$loop_dir/bg-pending.marker" 2>/dev/null || true + jq -n --arg count "$pending_bg_count" \ + '{systemMessage: ("RLCR loop active. " + $count + " background task(s) still running - stop allowed naturally; loop has NOT terminated and will resume on completion.")}' + exit 0 + fi + + # ---------------------------------------- + # Same-Session Stale-Marker Cleanup + # ---------------------------------------- + # The cross-session guard above already exited for every foreign + # session, so reaching here with the marker present means the + # CURRENT session parked the loop and has now come back with a + # transcript showing no pending bg events. Remove the stale marker + # before the normal flow takes over. + # + # Two-part guard to make sure we never drop the parked-state + # signal without evidence: + # (a) list_pending_background_task_ids returned exit 0 -- the + # transcript was present, readable, AND parsed successfully. + # The helper is fail-closed on missing files, empty paths, + # jq parse failure, and truncation, so a non-zero exit + # blocks cleanup here even when the transcript "file" + # exists. + # (b) its output is empty -- proves "no pending" was + # authoritatively verified, not inferred from a failure. + # The check uses a single fresh call so we capture both the exit + # code and the emptiness without double-running jq. + if [[ -f "$loop_dir/bg-pending.marker" ]]; then + local pending_bg_check + if pending_bg_check=$(list_pending_background_task_ids "$transcript_path" "$loop_start_ts" 2>/dev/null) \ + && [[ -z "$pending_bg_check" ]]; then + rm -f "$loop_dir/bg-pending.marker" 2>/dev/null || true + fi + fi +} diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 5bd2b9c3..5726b23b 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Common functions for RLCR loop hooks # @@ -38,11 +38,23 @@ readonly FIELD_FULL_REVIEW_ROUND="full_review_round" readonly FIELD_ASK_CODEX_QUESTION="ask_codex_question" readonly FIELD_SESSION_ID="session_id" readonly FIELD_AGENT_TEAMS="agent_teams" +readonly FIELD_PRIVACY_MODE="privacy_mode" +readonly FIELD_MAINLINE_STALL_COUNT="mainline_stall_count" +readonly FIELD_LAST_MAINLINE_VERDICT="last_mainline_verdict" +readonly FIELD_DRIFT_STATUS="drift_status" + +readonly MAINLINE_VERDICT_ADVANCED="advanced" +readonly MAINLINE_VERDICT_STALLED="stalled" +readonly MAINLINE_VERDICT_REGRESSED="regressed" +readonly MAINLINE_VERDICT_UNKNOWN="unknown" + +readonly DRIFT_STATUS_NORMAL="normal" +readonly DRIFT_STATUS_REPLAN_REQUIRED="replan_required" # Default Codex configuration (single source of truth - all scripts reference this) # Scripts can pre-set DEFAULT_CODEX_MODEL/DEFAULT_CODEX_EFFORT before sourcing to override. # Config-backed defaults are loaded from the merge hierarchy after config-loader.sh is sourced. -# Precedence: pre-set value > config value > hardcoded fallback (gpt-5.4/high) +# Precedence: pre-set value > config value > hardcoded fallback (gpt-5.5/high) # # The actual assignment happens in the "Config-backed defaults" section below, # after config-loader.sh has been sourced and merged config is available. @@ -161,6 +173,10 @@ LOOP_COMMON_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" LOOP_COMMON_PLUGIN_ROOT="$(cd "$LOOP_COMMON_DIR/../.." && pwd)" export PLUGIN_ROOT="${PLUGIN_ROOT:-$LOOP_COMMON_PLUGIN_ROOT}" +# Shared project-root resolver (CLAUDE_PROJECT_DIR -> git toplevel, +# realpath-canonicalized). Must load before any caller needs PROJECT_ROOT. +source "$LOOP_COMMON_DIR/project-root.sh" + _lc_errexit=false; [[ -o errexit ]] && _lc_errexit=true _lc_nounset=false; [[ -o nounset ]] && _lc_nounset=true _lc_pipefail=false; [[ -o pipefail ]] && _lc_pipefail=true @@ -170,19 +186,28 @@ $_lc_nounset && set -u || set +u $_lc_pipefail && set -o pipefail || set +o pipefail unset _lc_errexit _lc_nounset _lc_pipefail -_LOOP_COMMON_PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(git rev-parse --show-toplevel 2>/dev/null || pwd)}" +_LOOP_COMMON_PROJECT_ROOT="$(resolve_project_root 2>/dev/null || true)" # Config loading is best-effort: use || true so a config-load failure does not # abort sourcing before callers' dependency checks (jq, codex) are reached. # Stderr is NOT suppressed so malformed config warnings remain visible. -_LOOP_COMMON_CONFIG="$(load_merged_config "$LOOP_COMMON_PLUGIN_ROOT" "$_LOOP_COMMON_PROJECT_ROOT")" || true +# +# Skip config loading when no project root is available (e.g. humanize.sh is +# sourced from .bashrc/.zshrc in a non-repo directory like $HOME). Passing an +# empty project_root to load_merged_config would surface a usage error on +# stderr every time the shell starts. +if [[ -n "$_LOOP_COMMON_PROJECT_ROOT" ]]; then + _LOOP_COMMON_CONFIG="$(load_merged_config "$LOOP_COMMON_PLUGIN_ROOT" "$_LOOP_COMMON_PROJECT_ROOT")" || true +else + _LOOP_COMMON_CONFIG="" +fi # Load bitlesson model from merged config (controls which CLI bitlesson-select.sh uses) DEFAULT_BITLESSON_MODEL="$(get_config_value "$_LOOP_COMMON_CONFIG" "bitlesson_model" 2>/dev/null || true)" DEFAULT_BITLESSON_MODEL="${DEFAULT_BITLESSON_MODEL:-haiku}" # Load codex model/effort from merged config so .humanize/config.json can set persistent -# defaults for all Codex-using features (RLCR, PR loop, ask-codex). -# Precedence: pre-set by caller (e.g. PR loop) > config value > hardcoded fallback (gpt-5.4/high) +# defaults for all Codex-using features (RLCR, ask-codex). +# Precedence: pre-set by caller > config value > hardcoded fallback (gpt-5.5/high) _cfg_codex_model="$(get_config_value "$_LOOP_COMMON_CONFIG" "codex_model" 2>/dev/null || true)" if [[ -n "$_cfg_codex_model" && ! "$_cfg_codex_model" =~ ^[a-zA-Z0-9._-]+$ ]]; then echo "Warning: Invalid codex_model in merged config: $_cfg_codex_model" >&2 @@ -194,7 +219,7 @@ elif [[ -n "$_cfg_codex_model" && ! "$_cfg_codex_model" =~ ^(gpt-|o[0-9]) ]]; th echo " Ignoring configured codex_model; using caller preset or fallback" >&2 _cfg_codex_model="" fi -DEFAULT_CODEX_MODEL="${DEFAULT_CODEX_MODEL:-${_cfg_codex_model:-gpt-5.4}}" +DEFAULT_CODEX_MODEL="${DEFAULT_CODEX_MODEL:-${_cfg_codex_model:-gpt-5.5}}" _cfg_codex_effort="$(get_config_value "$_LOOP_COMMON_CONFIG" "codex_effort" 2>/dev/null || true)" if [[ -n "$_cfg_codex_effort" && ! "$_cfg_codex_effort" =~ ^(xhigh|high|medium|low)$ ]]; then echo "Warning: Invalid codex_effort in merged config: $_cfg_codex_effort" >&2 @@ -230,6 +255,12 @@ extract_session_id() { printf '%s' "$input" | jq -r '.session_id // empty' 2>/dev/null || echo "" } +# Background-task helpers (expand_leading_tilde, extract_transcript_path, +# derive_loop_start_iso_ts, list/has/count_pending_background_task[_ids], +# handle_bg_task_short_circuit) live in loop-bg-tasks.sh and are sourced +# at the bottom of this file so every existing consumer of loop-common.sh +# continues to get them transparently. + # Resolve the active state file for a loop directory # Checks for finalize-state.md first, then state.md # Usage: resolve_active_state_file "$loop_dir" @@ -237,7 +268,9 @@ extract_session_id() { resolve_active_state_file() { local loop_dir="$1" - if [[ -f "$loop_dir/finalize-state.md" ]]; then + if [[ -f "$loop_dir/methodology-analysis-state.md" ]]; then + echo "$loop_dir/methodology-analysis-state.md" + elif [[ -f "$loop_dir/finalize-state.md" ]]; then echo "$loop_dir/finalize-state.md" elif [[ -f "$loop_dir/state.md" ]]; then echo "$loop_dir/state.md" @@ -255,7 +288,10 @@ resolve_any_state_file() { local loop_dir="$1" # Prefer active states - if [[ -f "$loop_dir/finalize-state.md" ]]; then + if [[ -f "$loop_dir/methodology-analysis-state.md" ]]; then + echo "$loop_dir/methodology-analysis-state.md" + return + elif [[ -f "$loop_dir/finalize-state.md" ]]; then echo "$loop_dir/finalize-state.md" return elif [[ -f "$loop_dir/state.md" ]]; then @@ -286,10 +322,17 @@ resolve_any_state_file() { # Empty stored session_id matches any filter (backward compat for pre-session # state files). # +# Third parameter `allow_bg_marker_fallback` (default "false"): when "true", +# the session-filter branch also considers a mismatched-session dir that holds +# a `bg-pending.marker` file AND an active state file. Only the RLCR stop +# hook opts in to this; every other caller (read/write/bash/plan-file +# validators, ...) keeps strict session isolation. +# # Outputs the directory path to stdout, or empty string if none found find_active_loop() { local loop_base_dir="$1" local filter_session_id="${2:-}" + local allow_bg_marker_fallback="${3:-false}" if [[ ! -d "$loop_base_dir" ]]; then echo "" @@ -313,9 +356,18 @@ find_active_loop() { return fi - # Session filter: iterate newest-to-oldest, find the first dir belonging - # to this session (any state file), then check if it is still active. + # Session filter: iterate newest-to-oldest. + # + # The caller's own (exact stored session_id) match takes precedence over + # any marker-based adoption: with multiple active RLCR loops in the same + # repo, a newer dir parked by a different session must not be returned + # before an older dir that actually belongs to the caller. Marker + # candidates are recorded during the scan and only used as a fallback + # when no exact match is found anywhere. Zombie-loop protection + # (terminal newest for this session returns empty) still wins over + # marker fallback. local dir + local marker_candidate="" while IFS= read -r dir; do [[ -z "$dir" ]] && continue local trimmed_dir="${dir%/}" @@ -329,9 +381,9 @@ find_active_loop() { local stored_session_id stored_session_id=$(sed -n '/^---$/,/^---$/{ /^'"${FIELD_SESSION_ID}"':/{ s/'"${FIELD_SESSION_ID}"': *//; p; } }' "$any_state" 2>/dev/null | tr -d ' ') - # Empty stored session_id matches any session (backward compat) + # Empty stored session_id matches any session (backward compat). if [[ -z "$stored_session_id" ]] || [[ "$stored_session_id" == "$filter_session_id" ]]; then - # This is the newest dir for this session -- only return if active + # Newest dir for this session -- only return if active. local active_state active_state=$(resolve_active_state_file "$trimmed_dir") if [[ -n "$active_state" ]]; then @@ -339,11 +391,36 @@ find_active_loop() { return fi # Session's newest loop is in terminal state; do not fall through + # to marker-based adoption either. echo "" return fi + + # Session mismatch. Only the stop hook opts in to marker-based + # adoption; validators and other callers keep strict isolation, so + # the candidate is only recorded when the caller explicitly allows + # it. + if [[ "$allow_bg_marker_fallback" == "true" ]] \ + && [[ -z "$marker_candidate" ]] \ + && [[ -f "$trimmed_dir/bg-pending.marker" ]]; then + local candidate_state + candidate_state=$(resolve_active_state_file "$trimmed_dir") + if [[ -n "$candidate_state" ]]; then + marker_candidate="$trimmed_dir" + fi + # Marker on a terminal loop is stale; leave it alone. + fi done < <(ls -1d "$loop_base_dir"/*/ 2>/dev/null | sort -r) + # No exact session match. Fall back to marker-based adoption only when + # the caller explicitly opted in -- the stop hook uses this to surface + # a "parked by another session" notice or to resume its own parked + # loop after a previous session died before the bg completion arrived. + if [[ "$allow_bg_marker_fallback" == "true" ]] && [[ -n "$marker_candidate" ]]; then + echo "$marker_candidate" + return + fi + echo "" } @@ -385,6 +462,10 @@ _parse_state_fields() { STATE_ASK_CODEX_QUESTION=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_ASK_CODEX_QUESTION}:" | sed "s/${FIELD_ASK_CODEX_QUESTION}: *//" | tr -d ' ' || true) STATE_SESSION_ID=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_SESSION_ID}:" | sed "s/${FIELD_SESSION_ID}: *//" || true) STATE_AGENT_TEAMS=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_AGENT_TEAMS}:" | sed "s/${FIELD_AGENT_TEAMS}: *//" | tr -d ' ' || true) + STATE_PRIVACY_MODE=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_PRIVACY_MODE}:" | sed "s/${FIELD_PRIVACY_MODE}: *//" | tr -d ' ' || true) + STATE_MAINLINE_STALL_COUNT=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_MAINLINE_STALL_COUNT}:" | sed "s/${FIELD_MAINLINE_STALL_COUNT}: *//" | tr -d ' ' || true) + STATE_LAST_MAINLINE_VERDICT=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_LAST_MAINLINE_VERDICT}:" | sed "s/${FIELD_LAST_MAINLINE_VERDICT}: *//" | tr -d ' ' || true) + STATE_DRIFT_STATUS=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_DRIFT_STATUS}:" | sed "s/${FIELD_DRIFT_STATUS}: *//" | tr -d ' ' || true) } # Parse state file frontmatter and set variables (tolerant mode with defaults) @@ -405,6 +486,9 @@ _parse_state_fields() { # STATE_FULL_REVIEW_ROUND - interval for Full Alignment Check (default: 5) # STATE_ASK_CODEX_QUESTION - "true" or "false" (v1.6.5+) # STATE_AGENT_TEAMS - "true" or "false" +# STATE_MAINLINE_STALL_COUNT - consecutive stalled/regressed implementation rounds +# STATE_LAST_MAINLINE_VERDICT - advanced/stalled/regressed/unknown +# STATE_DRIFT_STATUS - normal/replan_required # Returns: 0 on success, 1 if file not found # Note: For strict validation, use parse_state_file_strict() instead parse_state_file() { @@ -427,6 +511,11 @@ parse_state_file() { STATE_FULL_REVIEW_ROUND="${STATE_FULL_REVIEW_ROUND:-5}" STATE_ASK_CODEX_QUESTION="${STATE_ASK_CODEX_QUESTION:-true}" STATE_AGENT_TEAMS="${STATE_AGENT_TEAMS:-false}" + # Default privacy_mode to "true" for legacy loops that pre-date this field + STATE_PRIVACY_MODE="${STATE_PRIVACY_MODE:-true}" + STATE_MAINLINE_STALL_COUNT="${STATE_MAINLINE_STALL_COUNT:-0}" + STATE_LAST_MAINLINE_VERDICT="${STATE_LAST_MAINLINE_VERDICT:-$MAINLINE_VERDICT_UNKNOWN}" + STATE_DRIFT_STATUS="${STATE_DRIFT_STATUS:-$DRIFT_STATUS_NORMAL}" # STATE_REVIEW_STARTED left as-is (empty if missing, to allow schema validation) return 0 @@ -502,10 +591,131 @@ parse_state_file_strict() { STATE_FULL_REVIEW_ROUND="${STATE_FULL_REVIEW_ROUND:-5}" STATE_ASK_CODEX_QUESTION="${STATE_ASK_CODEX_QUESTION:-true}" STATE_AGENT_TEAMS="${STATE_AGENT_TEAMS:-false}" + STATE_PRIVACY_MODE="${STATE_PRIVACY_MODE:-true}" + STATE_MAINLINE_STALL_COUNT="${STATE_MAINLINE_STALL_COUNT:-0}" + STATE_LAST_MAINLINE_VERDICT="${STATE_LAST_MAINLINE_VERDICT:-$MAINLINE_VERDICT_UNKNOWN}" + STATE_DRIFT_STATUS="${STATE_DRIFT_STATUS:-$DRIFT_STATUS_NORMAL}" return 0 } +# Normalize mainline progress verdict to a safe enum. +# Usage: normalize_mainline_progress_verdict "ADVANCED" +normalize_mainline_progress_verdict() { + local verdict_lower + verdict_lower=$(printf '%s' "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]') + + case "$verdict_lower" in + "$MAINLINE_VERDICT_ADVANCED"|"$MAINLINE_VERDICT_STALLED"|"$MAINLINE_VERDICT_REGRESSED") + echo "$verdict_lower" + ;; + *) + echo "$MAINLINE_VERDICT_UNKNOWN" + ;; + esac +} + +# Normalize drift status to a safe enum. +# Usage: normalize_drift_status "replan_required" +normalize_drift_status() { + local status_lower + status_lower=$(printf '%s' "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]') + + case "$status_lower" in + "$DRIFT_STATUS_REPLAN_REQUIRED") + echo "$DRIFT_STATUS_REPLAN_REQUIRED" + ;; + *) + echo "$DRIFT_STATUS_NORMAL" + ;; + esac +} + +# Extract "Mainline Progress Verdict" from Codex review content. +# Outputs one of: advanced, stalled, regressed, unknown +# Usage: extract_mainline_progress_verdict "$review_content" +extract_mainline_progress_verdict() { + local review_content="$1" + local verdict_line + local verdict_value + + verdict_line=$(printf '%s\n' "$review_content" | grep -Ei 'Mainline Progress Verdict:[[:space:]]*(ADVANCED|STALLED|REGRESSED)([^A-Za-z]|$)' | tail -1 || true) + if [[ -z "$verdict_line" ]]; then + echo "$MAINLINE_VERDICT_UNKNOWN" + return + fi + + # Extract the verdict word using grep -oEi (portable) instead of sed /I (GNU-only). + # The preceding grep -Ei already ensures the line contains one of the three verdicts. + # Reject lines with multiple verdict keywords (e.g. placeholder template formats) + # to avoid silently accepting an ambiguous verdict. + local _verdict_matches + _verdict_matches=$(printf '%s\n' "$verdict_line" | grep -oEi 'ADVANCED|STALLED|REGRESSED') + local _match_count + _match_count=$(printf '%s\n' "$_verdict_matches" | wc -l) + if [[ "$_match_count" -gt 1 ]]; then + echo "$MAINLINE_VERDICT_UNKNOWN" + return + fi + verdict_value=$(printf '%s\n' "$_verdict_matches" | head -1) + normalize_mainline_progress_verdict "$verdict_value" +} + +# Upsert simple YAML frontmatter fields in a state file. +# Values must not contain newlines. +# Usage: upsert_state_fields "/path/to/state.md" "field=value" "other=value" +upsert_state_fields() { + local state_file="$1" + shift + + local temp_file="${state_file}.tmp.$$" + + awk -v assignments="$*" ' + BEGIN { + count = split(assignments, pairs, " "); + for (i = 1; i <= count; i++) { + eq = index(pairs[i], "="); + key = substr(pairs[i], 1, eq - 1); + val = substr(pairs[i], eq + 1); + keys[key] = val; + order[i] = key; + } + separator_count = 0; + } + { + if ($0 == "---") { + separator_count++; + if (separator_count == 2) { + for (i = 1; i <= count; i++) { + key = order[i]; + if (!(key in seen)) { + print key ": " keys[key]; + seen[key] = 1; + } + } + } + print; + next; + } + + handled = 0; + for (i = 1; i <= count; i++) { + key = order[i]; + if ($0 ~ ("^" key ":")) { + print key ": " keys[key]; + seen[key] = 1; + handled = 1; + break; + } + } + + if (!handled) { + print; + } + } + ' "$state_file" > "$temp_file" && mv "$temp_file" "$state_file" +} + # Detect review issues from codex review log file # Returns: # 0 - issues found (caller should continue review loop) @@ -583,7 +793,7 @@ to_lower() { } # Check if a path (lowercase) matches a round file pattern -# Usage: is_round_file "$lowercase_path" "summary|prompt|todos" +# Usage: is_round_file "$lowercase_path" "summary|prompt|todos|contract" is_round_file_type() { local path_lower="$1" local file_type="$2" @@ -600,7 +810,7 @@ extract_round_number() { filename_lower=$(to_lower "$filename") # Use sed for portable regex extraction (works in both bash and zsh) - echo "$filename_lower" | sed -n 's/.*round-\([0-9][0-9]*\)-\(summary\|prompt\|todos\)\.md$/\1/p' + echo "$filename_lower" | sed -n 's/.*round-\([0-9][0-9]*\)-\(summary\|prompt\|todos\|contract\)\.md$/\1/p' } # Check if a file is in the allowlist for the active loop @@ -664,6 +874,21 @@ You cannot modify finalize-state.md. This file is managed by the loop system dur load_and_render_safe "$TEMPLATE_DIR" "block/finalize-state-file-modification.md" "$fallback" } +# Standard message for blocking round contract access during Finalize Phase +# Usage: finalize_contract_blocked_message "read" +finalize_contract_blocked_message() { + local action="$1" + local fallback="# Finalize Contract Access Blocked + +There is no active round contract during the Finalize Phase. + +Do not {{ACTION}} historical round contract files. +Use finalize-summary.md for finalize-only notes and goal-tracker.md for current state." + + load_and_render_safe "$TEMPLATE_DIR" "block/finalize-contract-access.md" "$fallback" \ + "ACTION=$action" +} + # Standard message for blocking summary file modifications via Bash # Usage: summary_bash_blocked_message "$correct_summary_path" summary_bash_blocked_message() { @@ -692,6 +917,80 @@ is_goal_tracker_path() { echo "$path_lower" | grep -qE 'goal-tracker\.md$' } +# Extract the immutable section from a goal-tracker content stream. +# Supports both current trackers (with --- separator) and older trackers +# that jump directly from IMMUTABLE SECTION to MUTABLE SECTION. +extract_goal_tracker_immutable_from_stream() { + awk ' + /^## IMMUTABLE SECTION[[:space:]]*$/ { capture=1 } + capture && /^## MUTABLE SECTION[[:space:]]*$/ { exit } + capture && /^---[[:space:]]*$/ { exit } + capture { print } + ' +} + +# Extract the immutable section from an on-disk goal-tracker file. +# Usage: extract_goal_tracker_immutable_from_file "/path/to/goal-tracker.md" +extract_goal_tracker_immutable_from_file() { + local tracker_file="$1" + if [[ ! -f "$tracker_file" ]]; then + return 1 + fi + extract_goal_tracker_immutable_from_stream < "$tracker_file" +} + +# Extract the immutable section from an in-memory goal-tracker string. +# Usage: extract_goal_tracker_immutable_from_text "$content" +extract_goal_tracker_immutable_from_text() { + local tracker_content="$1" + printf '%s' "$tracker_content" | extract_goal_tracker_immutable_from_stream +} + +# Check whether a proposed goal-tracker update preserves the immutable section. +# Usage: goal_tracker_mutable_update_allowed "/path/to/current.md" "$new_content" +goal_tracker_mutable_update_allowed() { + local tracker_file="$1" + local updated_content="$2" + + local current_immutable="" + local updated_immutable="" + current_immutable=$(extract_goal_tracker_immutable_from_file "$tracker_file" 2>/dev/null || true) + updated_immutable=$(extract_goal_tracker_immutable_from_text "$updated_content" 2>/dev/null || true) + + # Legacy trackers without IMMUTABLE SECTION: allow edits unconditionally. + [[ -n "$current_immutable" ]] || return 0 + [[ "$current_immutable" == "$updated_immutable" ]] +} + +# Render the post-edit contents for a literal Edit operation. +# Returns non-zero if the edit preview cannot be produced. +# Usage: preview_edit_result "/path/to/file" "$old_string" "$new_string" "true|false" +preview_edit_result() { + local file_path="$1" + local old_string="$2" + local new_string="$3" + local replace_all="${4:-false}" + + command -v perl >/dev/null 2>&1 || return 1 + + FILE_PATH="$file_path" \ + OLD_STRING="$old_string" \ + NEW_STRING="$new_string" \ + REPLACE_ALL="$replace_all" \ + perl -0pe ' + BEGIN { + $old = $ENV{"OLD_STRING"}; + $new = $ENV{"NEW_STRING"}; + $replace_all = $ENV{"REPLACE_ALL"} eq "true"; + } + if ($replace_all) { + s/\Q$old\E/$new/g; + } else { + s/\Q$old\E/$new/; + } + ' "$file_path" +} + # Check if a path (lowercase) targets state.md is_state_file_path() { local path_lower="$1" @@ -704,6 +1003,21 @@ is_finalize_state_file_path() { echo "$path_lower" | grep -qE 'finalize-state\.md$' } +# Check if a path (lowercase) targets methodology-analysis-state.md +is_methodology_analysis_state_file_path() { + local path_lower="$1" + echo "$path_lower" | grep -qE 'methodology-analysis-state\.md$' +} + +# Standard message for blocking methodology-analysis-state file modifications +methodology_analysis_state_file_blocked_message() { + local fallback="# Methodology Analysis State File Modification Blocked + +You cannot modify methodology-analysis-state.md. This file is managed by the loop system during the Methodology Analysis Phase." + + load_and_render_safe "$TEMPLATE_DIR" "block/methodology-analysis-state-file-modification.md" "$fallback" +} + # Check if a path (lowercase) targets finalize-summary.md is_finalize_summary_path() { local path_lower="$1" @@ -765,10 +1079,20 @@ is_cancel_authorized() { return 4 fi + # Canonicalize the loop dir (idempotent: resolve_project_root already + # canonicalizes, but callers may supply a non-canonical override). Both + # sides of the upcoming string comparisons must be canonicalized through + # the same transformation or a symlinked prefix in the user's command + # (e.g. /var/... vs /private/var/... on macOS) will spuriously fail the + # authorization check. + local canonical_loop_dir + canonical_loop_dir="$(canonicalize_path "${active_loop_dir%/}")" + canonical_loop_dir="${canonical_loop_dir:-${active_loop_dir%/}}" + # Normalize: Replace $loop_dir and ${loop_dir} with actual path local normalized="$command_lower" local loop_dir_lower - loop_dir_lower="${active_loop_dir%/}/" + loop_dir_lower="${canonical_loop_dir}/" loop_dir_lower=$(echo "$loop_dir_lower" | tr '[:upper:]' '[:lower:]') normalized="${normalized//\$\{loop_dir\}/$loop_dir_lower}" @@ -864,29 +1188,59 @@ is_cancel_authorized() { return 5 fi - # Normalize and validate source path + # Normalize and validate source path. + # + # Use canonicalize_path_prefix (NOT canonicalize_path): we need to resolve + # symlinks in the parent directory so a symlinked project prefix matches + # canonical_loop_dir, but we MUST NOT dereference a symlink at the leaf. + # Otherwise a symlink like /tmp/alias -> <loop>/state.md would canonicalize + # to <loop>/state.md and pass the check, but `mv` would then operate on + # the link path itself, escaping the loop directory and/or corrupting + # loop state. The on-disk symlink rejection below (src_original check) + # still fires because it probes the real state.md under canonical_loop_dir. + # + # Re-lowercase after canonicalization because realpath on case-insensitive + # filesystems may restore the original casing of path components, which + # would diverge from the already-lowercased expected_* values. src=$(_normalize_path "$src") + local src_canonical + src_canonical="$(canonicalize_path_prefix "$src")" + src_canonical="${src_canonical:-$src}" + src_canonical=$(echo "$src_canonical" | tr '[:upper:]' '[:lower:]') local expected_src_state="${loop_dir_lower}state.md" local expected_src_finalize="${loop_dir_lower}finalize-state.md" - if [[ "$src" != "$expected_src_state" ]] && [[ "$src" != "$expected_src_finalize" ]]; then + local expected_src_methodology="${loop_dir_lower}methodology-analysis-state.md" + if [[ "$src_canonical" != "$expected_src_state" ]] && [[ "$src_canonical" != "$expected_src_finalize" ]] && [[ "$src_canonical" != "$expected_src_methodology" ]]; then return 5 fi - # Normalize and validate destination path + # Normalize and validate destination path. Uses canonicalize_path_prefix + # for the same reason as src: a symlink alias pointing at the real + # cancel-state.md must NOT pass authorization, because `mv` onto a + # symlink replaces the link rather than creating <loop>/cancel-state.md, + # corrupting loop state and moving state.md outside the loop dir. dest=$(_normalize_path "$dest") + local dest_canonical + dest_canonical="$(canonicalize_path_prefix "$dest")" + dest_canonical="${dest_canonical:-$dest}" + dest_canonical=$(echo "$dest_canonical" | tr '[:upper:]' '[:lower:]') local expected_dest="${loop_dir_lower}cancel-state.md" - if [[ "$dest" != "$expected_dest" ]]; then + if [[ "$dest_canonical" != "$expected_dest" ]]; then return 5 fi # SECURITY: Reject if source file is a symlink (filesystem check) # Determine source file by comparing against expected paths (not substring match) - # This avoids vulnerability when loop directory path contains "finalize" + # This avoids vulnerability when loop directory path contains "finalize" or "methodology" + # Use canonical_loop_dir so the symlink check runs against the real on-disk + # path rather than a user-supplied non-canonical form. local src_original - if [[ "$src" == "$expected_src_finalize" ]]; then - src_original="${active_loop_dir}/finalize-state.md" + if [[ "$src_canonical" == "$expected_src_methodology" ]]; then + src_original="${canonical_loop_dir}/methodology-analysis-state.md" + elif [[ "$src_canonical" == "$expected_src_finalize" ]]; then + src_original="${canonical_loop_dir}/finalize-state.md" else - src_original="${active_loop_dir}/state.md" + src_original="${canonical_loop_dir}/state.md" fi if [[ -L "$src_original" ]]; then return 6 # Source is a symlink @@ -901,196 +1255,6 @@ is_in_humanize_loop_dir() { echo "$path" | grep -q '\.humanize/rlcr/' } -# ======================================== -# PR Loop Bot Name Mapping -# ======================================== - -# Map bot names to GitHub comment author names: -# - claude -> claude[bot] -# - codex -> chatgpt-codex-connector[bot] -# -# Usage: author=$(map_bot_to_author "codex") -map_bot_to_author() { - local bot="$1" - case "$bot" in - codex) echo "chatgpt-codex-connector[bot]" ;; - *) echo "${bot}[bot]" ;; - esac -} - -# Reverse mapping: author name to bot name -# - chatgpt-codex-connector[bot] -> codex -# - chatgpt-codex-connector -> codex -# - claude[bot] -> claude -# -# Usage: bot=$(map_author_to_bot "chatgpt-codex-connector[bot]") -map_author_to_bot() { - local author="$1" - # Remove [bot] suffix if present - local author_clean="${author%\[bot\]}" - case "$author_clean" in - chatgpt-codex-connector) echo "codex" ;; - *) echo "$author_clean" ;; - esac -} - -# Build a YAML list string from an array of values -# Returns multiline string with " - value" for each item -# -# Usage: yaml_list=$(build_yaml_list "${array[@]}") -build_yaml_list() { - local result="" - for item in "$@"; do - result="${result} - - ${item}" - done - echo "$result" -} - -# Build a mention string from bot names (e.g., "@claude @codex") -# -# Usage: mentions=$(build_bot_mention_string "${bots[@]}") -build_bot_mention_string() { - local result="" - for bot in "$@"; do - if [[ -n "$result" ]]; then - result="${result} @${bot}" - else - result="@${bot}" - fi - done - echo "$result" -} - -# ======================================== -# PR Loop Directory Functions -# ======================================== - -# Check if a path is inside .humanize/pr-loop directory -is_in_pr_loop_dir() { - local path="$1" - echo "$path" | grep -q '\.humanize/pr-loop/' -} - -# Check if a path is inside any loop directory (RLCR or PR loop) -is_in_any_loop_dir() { - local path="$1" - is_in_humanize_loop_dir "$path" || is_in_pr_loop_dir "$path" -} - -# Find the most recent active PR loop directory with state.md -# Similar to find_active_loop but for PR loops -# Outputs the directory path to stdout, or empty string if none found -find_active_pr_loop() { - local loop_base_dir="$1" - - if [[ ! -d "$loop_base_dir" ]]; then - echo "" - return - fi - - local newest_dir - newest_dir=$(ls -1d "$loop_base_dir"/*/ 2>/dev/null | sort -r | head -1) - - if [[ -n "$newest_dir" && -f "${newest_dir}state.md" ]]; then - echo "${newest_dir%/}" - else - echo "" - fi -} - -# Check if a path (lowercase) matches a PR loop round file pattern -# Types: pr-comment, pr-resolve, pr-check, pr-feedback, prompt, codex-prompt -is_pr_round_file_type() { - local path_lower="$1" - local file_type="$2" - - echo "$path_lower" | grep -qE "round-[0-9]+-${file_type}\\.md\$" -} - -# Check if a path matches any PR loop read-only file type -# These files are generated by the system and should not be modified by Claude -is_pr_loop_readonly_file() { - local path_lower="$1" - - is_pr_round_file_type "$path_lower" "pr-comment" || \ - is_pr_round_file_type "$path_lower" "prompt" || \ - is_pr_round_file_type "$path_lower" "codex-prompt" || \ - is_pr_round_file_type "$path_lower" "pr-check" || \ - is_pr_round_file_type "$path_lower" "pr-feedback" -} - -# Validate PR loop pr-resolve file round number -# Returns 0 if valid (correct round or no active loop), exits with error message if wrong round -# Usage: validate_pr_resolve_round "$file_path_lower" "$action_verb" -# Arguments: -# $1 - File path (lowercase) -# $2 - Action verb for error message ("edit" or "write to") -validate_pr_resolve_round() { - local file_path_lower="$1" - local action_verb="$2" - - local project_root="${CLAUDE_PROJECT_DIR:-$(pwd)}" - local pr_loop_base_dir="$project_root/.humanize/pr-loop" - local active_pr_loop_dir - active_pr_loop_dir=$(find_active_pr_loop "$pr_loop_base_dir") - - if [[ -z "$active_pr_loop_dir" ]]; then - return 0 - fi - - local pr_state_file="$active_pr_loop_dir/state.md" - if [[ ! -f "$pr_state_file" ]]; then - return 0 - fi - - local pr_current_round - pr_current_round=$(sed -n '/^---$/,/^---$/{ /^current_round:/{ s/current_round: *//; p; } }' "$pr_state_file" | tr -d ' ') - pr_current_round="${pr_current_round:-0}" - - local claude_pr_round - claude_pr_round=$(echo "$file_path_lower" | sed -n 's|.*round-\([0-9]*\)-pr-resolve\.md$|\1|p') - - if [[ -n "$claude_pr_round" ]] && [[ "$claude_pr_round" != "$pr_current_round" ]]; then - local correct_path="$active_pr_loop_dir/round-${pr_current_round}-pr-resolve.md" - # NOTE: Avoid ${var^} (Bash 4+ only) for macOS Bash 3.2 compatibility - # Use tr for portable capitalization of first letter - local action_verb_cap - action_verb_cap=$(echo "$action_verb" | sed 's/^\(.\)/\U\1/') - # Fallback for systems where \U doesn't work (use awk instead) - if [[ "$action_verb_cap" == "$action_verb" ]] || [[ "$action_verb_cap" == *'U'* ]]; then - action_verb_cap=$(echo "$action_verb" | awk '{print toupper(substr($0,1,1)) tolower(substr($0,2))}') - fi - echo "# Wrong Round Number" >&2 - echo "" >&2 - echo "You tried to $action_verb round-${claude_pr_round}-pr-resolve.md but current PR loop round is **${pr_current_round}**." >&2 - echo "" >&2 - echo "$action_verb_cap: \`$correct_path\`" >&2 - return 2 - fi - - return 0 -} - -# Standard message for blocking PR loop state file modifications -pr_loop_state_blocked_message() { - local fallback="# PR Loop State File Modification Blocked - -You cannot modify state.md in .humanize/pr-loop/. This file is managed by the PR loop system." - - load_and_render_safe "$TEMPLATE_DIR" "block/pr-loop-state-modification.md" "$fallback" -} - -# Standard message for blocking PR loop prompt/comment file writes -pr_loop_prompt_blocked_message() { - local fallback="# PR Loop File Write Blocked - -You cannot write to round-*-pr-comment.md or round-*-prompt.md files in .humanize/pr-loop/. -These files are generated by the PR loop system and are read-only." - - load_and_render_safe "$TEMPLATE_DIR" "block/pr-loop-prompt-write.md" "$fallback" -} - # Check if a git add command would add .humanize files to version control # Usage: git_adds_humanize "$command_lower" # Returns 0 if the command would add .humanize files, 1 otherwise @@ -1153,7 +1317,7 @@ git_adds_humanize() { # Check for direct .humanize reference (blocked regardless of other flags) # Handles: .humanize, ./.humanize, path/to/.humanize, ".humanize", '.humanize' # Pattern matches .humanize at start, after space, after / or ./ AND followed by end, /, or space - # This avoids over-blocking .humanizeconfig or .humanize-backup + # This avoids over-blocking .humanizeconfig or .humanize-backup. if echo "$add_args_normalized" | grep -qE '(^|[[:space:]]|/)\.humanize($|/|[[:space:]])'; then return 0 fi @@ -1251,6 +1415,56 @@ IMPORTANT: The commit message must NOT contain the literal string \".humanize\" load_and_render_safe "$TEMPLATE_DIR" "block/git-add-humanize.md" "$fallback" } +# Return success if local Humanize runtime state has entered git tracking or the index. +# Untracked .humanize state is allowed; tracked or staged state must be blocked. +# Usage: git_has_tracked_humanize_state [project_root] +# +# Intentionally scoped to .humanize/ to stay consistent with git_adds_humanize, +# which explicitly allows unrelated paths like .humanize-backup or +# .humanizeconfig (see tests/test-humanize-escape.sh). ls-files covers both +# committed entries and paths staged via git add; paths the user has staged for +# removal via git rm --cached are correctly omitted so the user can unstick +# themselves without being re-blocked. +git_has_tracked_humanize_state() { + local project_root="${1:-.}" + + if [[ ! -d "$project_root/.git" ]] && ! git -C "$project_root" rev-parse --git-dir >/dev/null 2>&1; then + return 1 + fi + + if git -C "$project_root" ls-files -- .humanize 2>/dev/null | grep -q '.'; then + return 0 + fi + + return 1 +} + +# Standard message for blocking tracked/staged .humanize state. +# Usage: git_tracked_humanize_blocked_message +git_tracked_humanize_blocked_message() { + local fallback="# Tracked Humanize State Blocked + +Detected tracked or staged files under \`.humanize/\`. + +These files are local Humanize loop state and must remain outside version control. + +## Required Fix + +1. Remove Humanize state from the index: + + git rm --cached -r .humanize + +2. Keep only real project files staged. +3. Retry the stop action after the local state is no longer tracked. + +## Important + +- Do NOT use \`git add -f\` on Humanize state files. +- Do NOT commit RLCR trackers, round summaries, contracts, or cancel/finalize markers." + + load_and_render_safe "$TEMPLATE_DIR" "block/git-tracked-humanize.md" "$fallback" +} + # Standard message for blocking direct execution of hook scripts # Usage: stop_hook_direct_execution_blocked_message stop_hook_direct_execution_blocked_message() { @@ -1296,17 +1510,24 @@ command_modifies_file() { } # Standard message for blocking goal-tracker modifications after Round 0 -# Usage: goal_tracker_blocked_message "$current_round" "$summary_file_path" +# Usage: goal_tracker_blocked_message "$current_round" "$correct_goal_tracker_path" goal_tracker_blocked_message() { local current_round="$1" - local summary_file="$2" - local fallback="# Goal Tracker Modification Blocked (Round {{CURRENT_ROUND}}) + local correct_path="$2" + local fallback="# Goal Tracker Update Blocked (Round {{CURRENT_ROUND}}) + +After Round 0, you may update only the **MUTABLE SECTION** of the active goal tracker. -After Round 0, only Codex can modify the Goal Tracker. Include a Goal Tracker Update Request in your summary: {{SUMMARY_FILE}}" +Use Write or Edit on: {{CORRECT_PATH}} + +Rules: +- Keep the **IMMUTABLE SECTION** unchanged +- Do not modify `goal-tracker.md` via Bash +- Do not write to an old loop session's tracker" load_and_render_safe "$TEMPLATE_DIR" "block/goal-tracker-modification.md" "$fallback" \ "CURRENT_ROUND=$current_round" \ - "SUMMARY_FILE=$summary_file" + "CORRECT_PATH=$correct_path" } # End the loop by renaming state.md to indicate exit reason @@ -1344,172 +1565,14 @@ end_loop() { fi } -# ======================================== -# PR Loop Goal Tracker Functions -# ======================================== - -# Update the PR goal tracker after Codex analysis -# Usage: update_pr_goal_tracker "$GOAL_TRACKER_FILE" "$ROUND" "$BOT_RESULTS_JSON" -# -# Arguments: -# $1 - Path to goal-tracker.md -# $2 - Current round number -# $3 - JSON containing per-bot analysis results (optional) -# Format: {"bot": "name", "issues": N, "resolved": N} -# -# Updates: -# - Issue Summary table with new row -# - Total Statistics section -# - Issue Log with round entry +# Source background-task helpers. Sourced at the bottom so every function +# above is available to callers that only need loop-common.sh, while bg-aware +# callers (the stop hook, the test suite) still get the bg helpers via a +# single source of loop-common.sh. # -# Note: This is a helper function for the stop hook. The primary update -# mechanism is through Codex prompt instructions, but this ensures -# consistency when Codex doesn't update correctly. -update_pr_goal_tracker() { - local tracker_file="$1" - local round="$2" - local bot_results="${3:-}" - - if [[ ! -f "$tracker_file" ]]; then - echo "Warning: Goal tracker not found: $tracker_file" >&2 - return 1 - fi - - # Extract reviewer early for idempotency check (need to check round+reviewer combo) - local reviewer="Codex" - if [[ -n "$bot_results" && "$bot_results" != "null" ]]; then - reviewer=$(echo "$bot_results" | jq -r '.bot // "Codex"' 2>/dev/null || echo "Codex") - fi - - # IDEMPOTENCY CHECK: Check for BOTH round AND reviewer to support multi-bot rounds - # This allows multiple bots to add their own rows for the same round - local has_summary_row=false - local has_log_entry=false - - # Check if this specific round+reviewer combo already exists in Issue Summary - # Table format: | Round | Reviewer | Issues Found | Issues Resolved | Status | - if grep -qE "^\|[[:space:]]*${round}[[:space:]]*\|[[:space:]]*${reviewer}[[:space:]]*\|" "$tracker_file" 2>/dev/null; then - has_summary_row=true - fi - - # Check if this specific round+reviewer combo already exists in Issue Log - # Log format: "### Round N" followed by "Reviewer: ..." - if awk -v round="$round" -v reviewer="$reviewer" ' - /^### Round / { current_round = $3 } - current_round == round && $1 == reviewer":" { found = 1; exit } - END { exit !found } - ' "$tracker_file" 2>/dev/null; then - has_log_entry=true - fi - - if [[ "$has_summary_row" == "true" && "$has_log_entry" == "true" ]]; then - echo "Goal tracker: Round $round/$reviewer already has both Issue Summary and Issue Log entries, skipping update" >&2 - return 0 - fi - - # Track what we need to add (for partial updates) - local need_summary_row=true - local need_log_entry=true - [[ "$has_summary_row" == "true" ]] && need_summary_row=false - [[ "$has_log_entry" == "true" ]] && need_log_entry=false - - if [[ "$has_summary_row" == "true" || "$has_log_entry" == "true" ]]; then - echo "Goal tracker: Round $round/$reviewer has partial update (summary=$has_summary_row, log=$has_log_entry), completing..." >&2 - fi - - # Extract current totals - local current_found - current_found=$(grep -E "^- Total Issues Found:" "$tracker_file" | sed 's/.*: //' | tr -d ' ') - current_found=${current_found:-0} - - local current_resolved - current_resolved=$(grep -E "^- Total Issues Resolved:" "$tracker_file" | sed 's/.*: //' | tr -d ' ') - current_resolved=${current_resolved:-0} - - # Parse bot results if provided (reviewer already extracted above for idempotency check) - local new_issues=0 - local new_resolved=0 - - if [[ -n "$bot_results" && "$bot_results" != "null" ]]; then - new_issues=$(echo "$bot_results" | jq -r '.issues // 0' 2>/dev/null || echo "0") - new_resolved=$(echo "$bot_results" | jq -r '.resolved // 0' 2>/dev/null || echo "0") - fi - - # Calculate new totals - local total_found=$((current_found + new_issues)) - local total_resolved=$((current_resolved + new_resolved)) - local remaining=$((total_found - total_resolved)) - - # Determine status for this round - local status="In Progress" - if [[ $new_issues -eq 0 && $new_resolved -eq 0 ]]; then - status="Approved" - elif [[ $new_issues -gt 0 ]]; then - status="Issues Found" - elif [[ $new_resolved -gt 0 ]]; then - status="Resolved" - fi - - # Create temp file for updates - local temp_file="${tracker_file}.update.$$" - - # Step 1: Update Total Statistics (only if we're adding to totals) - # Only update totals if we're adding a new summary row (to avoid double-counting) - if [[ "$need_summary_row" == "true" ]]; then - sed -e "s/^- Total Issues Found:.*/- Total Issues Found: $total_found/" \ - -e "s/^- Total Issues Resolved:.*/- Total Issues Resolved: $total_resolved/" \ - -e "s/^- Remaining:.*/- Remaining: $remaining/" \ - "$tracker_file" > "$temp_file" - else - cp "$tracker_file" "$temp_file" - fi - - # Step 2: Add row to Issue Summary table (only if needed) - if [[ "$need_summary_row" == "true" ]]; then - # Insert row INSIDE the table (after last table row, before blank line) - local new_row="| $round | $reviewer | $new_issues | $new_resolved | $status |" - - # Use awk to find the last row of the Issue Summary table and insert after it - awk -v row="$new_row" ' - BEGIN { in_table = 0; last_row_printed = 0 } - /^## Issue Summary/ { in_table = 1 } - /^## Total Statistics/ { in_table = 0 } - { - # If we hit Total Statistics and havent printed the new row yet, print it first - if (/^## Total Statistics/ && !last_row_printed) { - print row - print "" - last_row_printed = 1 - } - # If in table and this is a table row (starts with |), store it - if (in_table && /^\|/) { - last_table_line = NR - } - # If in table and this is a blank line after table rows, insert new row - if (in_table && /^[[:space:]]*$/ && last_table_line > 0 && !last_row_printed) { - print row - last_row_printed = 1 - } - print - } - ' "$temp_file" > "${temp_file}.2" - mv "${temp_file}.2" "$temp_file" - fi - - # Step 3: Add Issue Log entry for this round (only if needed) - if [[ "$need_log_entry" == "true" ]]; then - local timestamp - timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") - local log_entry="### Round $round -$reviewer: Found $new_issues issues, Resolved $new_resolved -Updated: $timestamp -" - # Append to Issue Log section - echo "" >> "$temp_file" - echo "$log_entry" >> "$temp_file" - fi - - mv "$temp_file" "$tracker_file" - echo "Goal tracker updated: Round $round, Reviewer=$reviewer, Found=$new_issues, Resolved=$new_resolved" >&2 - return 0 -} +# _LOOP_COMMON_DIR is set here instead of at the top of the file because +# loop-bg-tasks.sh lives in the same directory as this file and we want to +# locate it regardless of how loop-common.sh was sourced. +_LOOP_COMMON_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +# shellcheck source=loop-bg-tasks.sh +source "$_LOOP_COMMON_DIR/loop-bg-tasks.sh" diff --git a/hooks/lib/methodology-analysis.sh b/hooks/lib/methodology-analysis.sh new file mode 100644 index 00000000..a95e81af --- /dev/null +++ b/hooks/lib/methodology-analysis.sh @@ -0,0 +1,206 @@ +#!/usr/bin/env bash +# +# Methodology Analysis Phase library +# +# Provides functions for the methodology improvement analysis phase that runs +# before the RLCR loop truly exits. An independent Opus agent analyzes the +# development records from a pure methodology perspective and optionally helps +# the user file a GitHub issue with improvement suggestions. +# +# This library is sourced by loop-codex-stop-hook.sh. +# + +# Source guard: prevent double-sourcing +[[ -n "${_METHODOLOGY_ANALYSIS_LOADED:-}" ]] && return 0 2>/dev/null || true +_METHODOLOGY_ANALYSIS_LOADED=1 + +# Enter the methodology analysis phase +# +# Renames the current state file to methodology-analysis-state.md, records the +# exit reason, renders the analysis prompt, and outputs a block JSON response. +# +# Arguments: +# $1 - exit_reason: "complete", "stop", or "maxiter" +# $2 - exit_reason_description: human-readable explanation of why the loop is exiting +# +# Globals read: +# PRIVACY_MODE - "true" to skip analysis, "false" to proceed +# STATE_FILE - path to the current active state file +# LOOP_DIR - path to the loop directory +# CURRENT_ROUND - current round number +# MAX_ITERATIONS - max iterations setting +# TEMPLATE_DIR - template directory for prompt rendering +# +# Returns: +# 0 - analysis phase entered, block JSON has been output, caller should exit 0 +# 1 - analysis should be skipped (privacy on, already done, or re-entry) +# +enter_methodology_analysis_phase() { + local exit_reason="$1" + local exit_reason_description="$2" + + # Skip if privacy mode is on + if [[ "$PRIVACY_MODE" == "true" ]]; then + echo "Methodology analysis skipped (privacy mode enabled)" >&2 + return 1 + fi + + # Prevent re-entry: if methodology-analysis-state.md already exists, skip + if [[ -f "$LOOP_DIR/methodology-analysis-state.md" ]]; then + echo "Methodology analysis phase already active, skipping re-entry" >&2 + return 1 + fi + + # Skip if already completed in a previous attempt + if [[ -f "$LOOP_DIR/methodology-analysis-done.md" ]]; then + local done_content + done_content=$(cat "$LOOP_DIR/methodology-analysis-done.md" 2>/dev/null || echo "") + if [[ -n "$done_content" ]]; then + echo "Methodology analysis already completed, skipping" >&2 + return 1 + fi + fi + + # Rename current state file to methodology-analysis-state.md + mv "$STATE_FILE" "$LOOP_DIR/methodology-analysis-state.md" + echo "State file renamed to: $LOOP_DIR/methodology-analysis-state.md" >&2 + + # Record the original exit reason so the completion handler can finalize + echo "$exit_reason" > "$LOOP_DIR/.methodology-exit-reason" + + # Create empty placeholder for the completion artifact + touch "$LOOP_DIR/methodology-analysis-done.md" + + # Render prompt template + local fallback="# Methodology Analysis Phase + +Please analyze the development records in $LOOP_DIR and provide methodology improvement suggestions. +Write your analysis to $LOOP_DIR/methodology-analysis-report.md. +When done, write a completion note to $LOOP_DIR/methodology-analysis-done.md." + + local analysis_prompt + analysis_prompt=$(load_and_render_safe "$TEMPLATE_DIR" "claude/methodology-analysis-prompt.md" "$fallback" \ + "LOOP_DIR=$LOOP_DIR" \ + "EXIT_REASON=$exit_reason" \ + "EXIT_REASON_DESCRIPTION=$exit_reason_description" \ + "CURRENT_ROUND=$CURRENT_ROUND" \ + "MAX_ITERATIONS=$MAX_ITERATIONS") + + # Output block JSON with the rendered prompt + jq -n \ + --arg reason "$analysis_prompt" \ + --arg msg "Loop: Methodology Analysis Phase - analyzing development methodology" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + + return 0 +} + +# Complete the methodology analysis phase +# +# Checks the completion artifact, reads the original exit reason, renames the +# state file to the appropriate terminal state, and cleans up marker files. +# +# Globals read: +# LOOP_DIR - path to the loop directory +# +# Returns: +# 0 - completion successful, caller should exit 0 (allow exit) +# 1 - incomplete (done marker missing/empty, report missing, or exit reason invalid) +# +complete_methodology_analysis() { + local done_file="$LOOP_DIR/methodology-analysis-done.md" + local report_file="$LOOP_DIR/methodology-analysis-report.md" + + # Check completion artifact has actual content (not just empty placeholder) + if [[ ! -f "$done_file" ]]; then + return 1 + fi + + local done_content + done_content=$(cat "$done_file" 2>/dev/null || echo "") + # Trim whitespace to reject whitespace-only markers + done_content="${done_content#"${done_content%%[![:space:]]*}"}" + if [[ -z "$done_content" ]]; then + return 1 + fi + + # Require the analysis report to exist with content (ensures the Opus agent + # actually produced an analysis, not just an empty/truncated file) + if [[ ! -f "$report_file" ]]; then + echo "Warning: methodology-analysis-report.md missing, blocking completion" >&2 + return 1 + fi + local report_content + report_content=$(cat "$report_file" 2>/dev/null || echo "") + report_content="${report_content#"${report_content%%[![:space:]]*}"}" + if [[ -z "$report_content" ]]; then + echo "Warning: methodology-analysis-report.md is empty, blocking completion" >&2 + return 1 + fi + + # Read exit reason (fail closed: missing marker blocks completion) + if [[ ! -f "$LOOP_DIR/.methodology-exit-reason" ]]; then + echo "Error: .methodology-exit-reason marker missing, cannot determine terminal state" >&2 + return 1 + fi + + local exit_reason + exit_reason=$(cat "$LOOP_DIR/.methodology-exit-reason" 2>/dev/null || echo "") + exit_reason=$(echo "$exit_reason" | tr -d '[:space:]') + + # Validate exit reason (fail closed on invalid values) + case "$exit_reason" in + complete|stop|maxiter) + ;; + *) + echo "Error: Invalid methodology exit reason '$exit_reason', blocking completion" >&2 + return 1 + ;; + esac + + # Rename methodology-analysis-state.md to the terminal state + local target_name="${exit_reason}-state.md" + mv "$LOOP_DIR/methodology-analysis-state.md" "$LOOP_DIR/$target_name" + echo "Methodology analysis complete. State preserved as: $LOOP_DIR/$target_name" >&2 + + # Clean up marker file + rm -f "$LOOP_DIR/.methodology-exit-reason" + + return 0 +} + +# Block exit because methodology analysis is incomplete +# +# Outputs a block JSON instructing Claude to complete the analysis before exiting. +# +# Globals read: +# LOOP_DIR - path to the loop directory +# +block_methodology_analysis_incomplete() { + local done_file="$LOOP_DIR/methodology-analysis-done.md" + + local reason="# Methodology Analysis Incomplete + +Please complete the methodology analysis before exiting. + +You need to: +1. Spawn an Opus agent to analyze the development records +2. Review the analysis report +3. Optionally help the user file a GitHub issue +4. Write a completion note to: $done_file + +The completion marker file must contain actual content (not be empty) to signal that the analysis is done." + + jq -n \ + --arg reason "$reason" \ + --arg msg "Loop: Methodology Analysis Phase - please complete the analysis" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' +} diff --git a/hooks/lib/project-root.sh b/hooks/lib/project-root.sh new file mode 100644 index 00000000..cb23403a --- /dev/null +++ b/hooks/lib/project-root.sh @@ -0,0 +1,144 @@ +#!/usr/bin/env bash +# +# Deterministic project-root resolver for all humanize hooks and scripts. +# +# Resolution priority: +# 1. CLAUDE_PROJECT_DIR (set by Claude Code, stable across `cd` within a session) +# 2. git rev-parse --show-toplevel (nearest enclosing repo) +# 3. Non-zero return. +# +# pwd is intentionally NOT used as a fallback: it drifts with `cd` +# invocations during a session and silently causes state.md lookups +# under .humanize/rlcr/ to miss the active loop directory. +# +# The resolved path is passed through realpath so symlinked prefixes +# (e.g. /Users/x vs /private/Users/x on macOS, or /var vs /private/var) +# do not diverge between setup-time and hook-time resolution. +# +# Path-comparison sites in validators must mirror this by canonicalizing +# the user-provided side as well; use the companion `canonicalize_path` +# helper below. +# + +if [[ -n "${_HUMANIZE_PROJECT_ROOT_SOURCED:-}" ]]; then + return 0 2>/dev/null || true +fi +_HUMANIZE_PROJECT_ROOT_SOURCED=1 + +# resolve_project_root +# +# Prints the resolved project root to stdout. Returns 0 on success, +# 1 when neither CLAUDE_PROJECT_DIR nor a git toplevel is available. +# +# Callers that must have a project root should handle the failure: +# +# PROJECT_ROOT="$(resolve_project_root)" || exit 0 # hook: allow natural stop +# PROJECT_ROOT="$(resolve_project_root)" || { # setup: hard error +# echo "Error: cannot determine humanize project root" >&2 +# exit 1 +# } +# +resolve_project_root() { + local root="${CLAUDE_PROJECT_DIR:-}" + if [[ -z "$root" ]]; then + root="$(git rev-parse --show-toplevel 2>/dev/null || true)" + fi + if [[ -z "$root" ]]; then + return 1 + fi + + local canonical + canonical=$(canonicalize_path "$root") + printf '%s\n' "${canonical:-$root}" +} + +# canonicalize_path_prefix +# +# Resolves symlinks ONLY in the parent directory and reattaches the +# original basename verbatim. This is the right helper for comparing +# user-supplied filenames against an expected path inside a known +# directory: a symlink at /tmp/alias pointing at /real/loop/state.md +# MUST NOT canonicalize to /real/loop/state.md for comparison purposes, +# because `mv` operates on the link path itself. Resolving only the +# parent still lets a symlinked project prefix (e.g. /var vs /private/var +# on macOS) match a canonical expected path. +# +# If realpath on the parent fails, falls back to returning the input +# path unchanged (prefix cannot be canonicalized -> caller's comparison +# will correctly fail against a canonical expected path). +# +# Empty input prints nothing and returns 0. +# +canonicalize_path_prefix() { + local path="$1" + if [[ -z "$path" ]]; then + return 0 + fi + + local parent base parent_real + parent=$(dirname -- "$path") + base=$(basename -- "$path") + + if parent_real=$(realpath "$parent" 2>/dev/null) && [[ -n "$parent_real" ]]; then + printf '%s/%s\n' "${parent_real%/}" "$base" + return 0 + fi + + if command -v python3 >/dev/null 2>&1; then + parent_real=$(python3 -c 'import os,sys;print(os.path.realpath(sys.argv[1]))' "$parent" 2>/dev/null || true) + if [[ -n "$parent_real" ]]; then + printf '%s/%s\n' "${parent_real%/}" "$base" + return 0 + fi + fi + + printf '%s\n' "$path" +} + +# canonicalize_path +# +# Prints the realpath of the input path. If the path itself does not +# exist yet (common for write validation before the file is created), +# canonicalizes the parent directory and reattaches the basename. +# If realpath is unavailable and python3 is missing, prints the input +# path verbatim. +# +# SECURITY NOTE: This helper dereferences symlinks at the leaf when +# the leaf exists. Do NOT use it to authorize a user-supplied path +# against an expected filename -- use canonicalize_path_prefix instead, +# which only resolves the parent. +# +# Empty input prints nothing and returns 0. +# +canonicalize_path() { + local path="$1" + if [[ -z "$path" ]]; then + return 0 + fi + + local canonical="" + + if canonical=$(realpath "$path" 2>/dev/null) && [[ -n "$canonical" ]]; then + printf '%s\n' "$canonical" + return 0 + fi + + # Path does not exist: canonicalize parent, reattach basename. + local parent base + parent=$(dirname -- "$path") + base=$(basename -- "$path") + if canonical=$(realpath "$parent" 2>/dev/null) && [[ -n "$canonical" ]]; then + printf '%s/%s\n' "${canonical%/}" "$base" + return 0 + fi + + if command -v python3 >/dev/null 2>&1; then + canonical=$(python3 -c 'import os,sys;print(os.path.realpath(sys.argv[1]))' "$path" 2>/dev/null || true) + if [[ -n "$canonical" ]]; then + printf '%s\n' "$canonical" + return 0 + fi + fi + + printf '%s\n' "$path" +} diff --git a/hooks/lib/template-loader.sh b/hooks/lib/template-loader.sh index 7f46853d..13d29f6e 100644 --- a/hooks/lib/template-loader.sh +++ b/hooks/lib/template-loader.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Template loading functions for RLCR loop hooks # @@ -41,9 +41,7 @@ load_template() { if [[ -f "$template_path" ]]; then cat "$template_path" else - echo "" >&2 echo "Warning: Template not found: $template_path" >&2 - echo "" fi } @@ -71,6 +69,7 @@ render_template() { # Single-pass replacement using awk # Scans for {{VAR}} patterns and replaces them with values from environment # Replaced content goes directly to output without re-scanning + local awk_exit=0 content=$(env "${env_vars[@]}" awk ' BEGIN { # Build lookup table from environment variables with TMPL_VAR_ prefix @@ -126,7 +125,12 @@ render_template() { } print result - }' <<< "$content") + }' <<< "$content") || awk_exit=$? + + if [[ $awk_exit -ne 0 ]]; then + echo "Error: Template rendering failed (awk exit code: $awk_exit)" >&2 + return 1 + fi echo "$content" } @@ -148,22 +152,36 @@ load_and_render() { # Append content from another template file # Usage: append_template "$base_content" "$TEMPLATE_DIR" "claude/post-alignment.md" +# Only appends if the template exists and is non-empty. append_template() { local base_content="$1" local template_dir="$2" local template_name="$3" local additional_content - additional_content=$(load_template "$template_dir" "$template_name") + additional_content=$(load_template "$template_dir" "$template_name" 2>/dev/null) || true echo "$base_content" - echo "$additional_content" + if [[ -n "$additional_content" ]]; then + echo "$additional_content" + fi } # ======================================== # Safe versions with fallback messages # ======================================== +# Emit a fallback message, optionally rendering template variables. +_emit_fallback() { + local fallback_msg="$1" + shift + if [[ $# -gt 0 ]]; then + render_template "$fallback_msg" "$@" + else + echo "$fallback_msg" + fi +} + # Load and render with a fallback message if template fails # Usage: load_and_render_safe "$TEMPLATE_DIR" "block/message.md" "fallback message" "VAR=value" ... # Returns fallback message if template is missing or empty @@ -174,28 +192,18 @@ load_and_render_safe() { shift 3 local content - content=$(load_template "$template_dir" "$template_name" 2>/dev/null) + content=$(load_template "$template_dir" "$template_name" 2>/dev/null) || true if [[ -z "$content" ]]; then - # Template missing - use fallback with variable substitution - if [[ $# -gt 0 ]]; then - render_template "$fallback_msg" "$@" - else - echo "$fallback_msg" - fi + _emit_fallback "$fallback_msg" "$@" return fi local result - result=$(render_template "$content" "$@") + result=$(render_template "$content" "$@") || true if [[ -z "$result" ]]; then - # Rendering produced empty result - use fallback - if [[ $# -gt 0 ]]; then - render_template "$fallback_msg" "$@" - else - echo "$fallback_msg" - fi + _emit_fallback "$fallback_msg" "$@" return fi @@ -213,8 +221,16 @@ validate_template_dir() { return 1 fi - if [[ ! -d "$template_dir/block" ]] || [[ ! -d "$template_dir/codex" ]] || [[ ! -d "$template_dir/claude" ]]; then - echo "ERROR: Template directory missing subdirectories: $template_dir" >&2 + local required_subdirs=("block" "codex" "claude" "plan") + local missing=() + local subdir + for subdir in "${required_subdirs[@]}"; do + if [[ ! -d "$template_dir/$subdir" ]]; then + missing+=("$subdir") + fi + done + if [[ ${#missing[@]} -gt 0 ]]; then + echo "ERROR: Template directory missing subdirectories (${missing[*]}): $template_dir" >&2 return 1 fi diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 3d947ec2..ede35304 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -1,14 +1,12 @@ -#!/bin/bash +#!/usr/bin/env bash # -# PreToolUse Hook: Validate Bash commands for RLCR loop and PR loop +# PreToolUse Hook: Validate Bash commands for RLCR loop # # Blocks attempts to bypass Write/Edit hooks using shell commands: # - cat/echo/printf > file.md (redirection) # - tee file.md # - sed -i file.md (in-place edit) -# - goal-tracker.md modifications after Round 0 -# - PR loop state.md modifications -# - PR loop read-only file modifications (pr-comment, prompt, codex-prompt, etc.) +# - goal-tracker.md modifications via Bash # set -euo pipefail @@ -51,7 +49,7 @@ COMMAND_LOWER=$(to_lower "$COMMAND") # Find Active Loops (needed for multiple checks) # ======================================== -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" +PROJECT_ROOT="$(resolve_project_root)" || exit 0 # Extract session_id from hook input for session-aware loop filtering HOOK_SESSION_ID=$(extract_session_id "$HOOK_INPUT") @@ -60,12 +58,120 @@ HOOK_SESSION_ID=$(extract_session_id "$HOOK_INPUT") LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" ACTIVE_LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID") -# Check for active PR loop -PR_LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/pr-loop" -ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") +# ======================================== +# Methodology Analysis Phase Bash Restriction +# ======================================== +# During methodology analysis, block file-modifying bash commands. +# Only read-only operations and cancel-rlcr-loop.sh are allowed. +# This prevents source code modifications after Codex has signed off. +# +# Accepted limitations: +# - Read-only bash commands (cat, grep, find, etc.) are NOT blocked. Blocking +# them would break basic Claude operations. The analysis prompt directs Claude +# to derive user-facing content only from methodology-analysis-report.md. +# - Spawned agents (different session_id) are not restricted by hooks; their +# sanitization is enforced by the analysis prompt. This is an inherent +# limitation of the hook architecture which cannot distinguish spawned agents +# from unrelated sessions. +# +# Use only the session-matched loop. Do NOT fall back to an unfiltered search, +# as that would incorrectly restrict unrelated sessions opened in the same repo. +_MA_BASH_DIR="$ACTIVE_LOOP_DIR" + +if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then + # Allow cancel-rlcr-loop.sh only as the leading command (not as an argument + # to another command like cp/mv). The optional path prefix must be a single + # token with no embedded whitespace, otherwise commands like + # `bash cancel-rlcr-loop.sh` or `tee cancel-rlcr-loop.sh` would match. + # The script name must be followed by whitespace or end-of-line so trailing + # tokens cannot hide additional arguments. + # + # Also reject any shell metacharacter that can inject or redirect work + # after the cancel invocation: pipes/sequence/background operators, + # command substitution ($(...) or backticks), redirection (<, >), and + # multi-line payloads. The earlier narrower check only rejected ; | &, + # letting payloads like `cancel-rlcr-loop.sh $(touch /tmp/pwn)` or a + # newline-delimited second command slip past this early exit and reach + # arbitrary file modifications before the downstream blockers run. + _ma_has_shell_meta=false + case "$COMMAND_LOWER" in + *';'*|*'|'*|*'&'*|*'`'*|*'>'*|*'<'*|*'$('*|*$'\n'*) + _ma_has_shell_meta=true + ;; + esac + if [[ "$_ma_has_shell_meta" != "true" ]] && \ + echo "$COMMAND_LOWER" | grep -qE '^[[:space:]]*"?([^[:space:]"]+/)?cancel-rlcr-loop\.sh"?([[:space:]]|$)'; then + exit 0 + fi + # Block git commands that modify the working tree + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])git[[:space:]]+(commit|add|reset|checkout|merge|rebase|cherry-pick|am|apply|stash|push|restore|clean|rm|mv|switch|pull|clone|submodule|worktree)'; then + echo "# Bash Blocked During Methodology Analysis -# If no active loop of either type, allow all commands -if [[ -z "$ACTIVE_LOOP_DIR" ]] && [[ -z "$ACTIVE_PR_LOOP_DIR" ]]; then +Git write commands are not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block file manipulation commands (touch, mv, cp, rm, mkdir, ln, patch, etc.) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(tee|install|touch|mv|cp|rm|dd|truncate|chmod|chown|mkdir|rmdir|ln|mktemp|patch)[[:space:]]'; then + echo "# Bash Blocked During Methodology Analysis + +File modification commands are not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block in-place file editing tools + if echo "$COMMAND_LOWER" | grep -qE 'sed[[:space:]]+-i|awk[[:space:]]+-i[[:space:]]+inplace|perl[[:space:]]+-[^[:space:]]*i'; then + echo "# Bash Blocked During Methodology Analysis + +In-place file editing is not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block common interpreters that could write files (defense-in-depth) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(python[23]?|ruby|node|perl|php)[[:space:]]'; then + echo "# Bash Blocked During Methodology Analysis + +Running interpreters is not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block shell script entry points (bash script.sh, sh script.sh, source, .) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(/usr/bin/env[[:space:]]+)?(bash|sh|zsh|/bin/bash|/bin/sh|/bin/zsh)[[:space:]]'; then + echo "# Bash Blocked During Methodology Analysis + +Running shell scripts is not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block build tools that execute arbitrary commands + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(make|cmake|ninja|gradle|mvn|ant|cargo|go[[:space:]]+run|go[[:space:]]+generate|npm[[:space:]]+run|yarn[[:space:]]+run|npx|pnpm)[[:space:]]'; then + echo "# Bash Blocked During Methodology Analysis + +Build tools are not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block source/dot commands (source script.sh, . script.sh) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(source|\.)[ ]+[^[:space:]]'; then + echo "# Bash Blocked During Methodology Analysis + +Sourcing scripts is not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block direct script execution (./script.sh, ../script.sh, /path/to/script) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])\.{0,2}/[^[:space:]>|&;]*\.(sh|bash|py|rb|pl|js)'; then + echo "# Bash Blocked During Methodology Analysis + +Direct script execution is not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block output redirection to files (catches cat > file, echo > file, etc.) + # Strip safe redirections (/dev/ paths, fd duplication) then check for remaining > + _ma_stripped=$(echo "$COMMAND_LOWER" | sed 's|[0-9]*>[>]*[[:space:]]*/dev/[^[:space:]]*||g; s|[0-9]*>&[0-9]*||g') + if echo "$_ma_stripped" | grep -qE '[>]'; then + echo "# Bash Blocked During Methodology Analysis + +File redirection is not allowed during the methodology analysis phase." >&2 + exit 2 + fi +fi + +# If no active RLCR loop, allow all commands +if [[ -z "$ACTIVE_LOOP_DIR" ]]; then exit 0 fi @@ -75,7 +181,7 @@ fi # Prevents Claude from manually running stop hook or stop gate scripts. # These scripts should only be invoked by the hooks system, not via Bash. -BLOCKED_HOOK_SCRIPTS="(loop-codex-stop-hook\.sh|pr-loop-stop-hook\.sh|rlcr-stop-gate\.sh)" +BLOCKED_HOOK_SCRIPTS="(loop-codex-stop-hook\.sh|rlcr-stop-gate\.sh)" HOOK_ASSIGNMENT_PREFIX="[[:alpha:]_][[:alnum:]_]*=[^[:space:];&|]+" HOOK_COMMAND_PREFIX="command([[:space:]]+(-[^[:space:];&|]+|--))*" HOOK_ENV_PREFIX="env([[:space:]]+(-[^[:space:];&|]+|--|${HOOK_ASSIGNMENT_PREFIX}))*" @@ -162,6 +268,15 @@ if [[ -n "$ACTIVE_LOOP_DIR" ]]; then # 1. command_modifies_file checks if DESTINATION contains state.md # 2. Additional check below catches if SOURCE contains state.md (e.g., mv state.md /tmp/foo) +if command_modifies_file "$COMMAND_LOWER" "methodology-analysis-state\.md"; then + # Check for cancel signal file - allow authorized cancel operation + if is_cancel_authorized "$ACTIVE_LOOP_DIR" "$COMMAND_LOWER"; then + exit 0 + fi + methodology_analysis_state_file_blocked_message >&2 + exit 2 +fi + if command_modifies_file "$COMMAND_LOWER" "finalize-state\.md"; then # Check for cancel signal file - allow authorized cancel operation if is_cancel_authorized "$ACTIVE_LOOP_DIR" "$COMMAND_LOWER"; then @@ -196,6 +311,7 @@ fi # This catches chained commands like: true; mv state.md /tmp/foo MV_CP_SOURCE_PATTERN="^[[:space:]]*(sudo([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(env[[:space:]]+[^;&|]*[[:space:]]+)?(command([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(mv|cp)[[:space:]].*[[:space:]/\"']state\.md" MV_CP_FINALIZE_SOURCE_PATTERN="^[[:space:]]*(sudo([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(env[[:space:]]+[^;&|]*[[:space:]]+)?(command([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(mv|cp)[[:space:]].*[[:space:]/\"']finalize-state\.md" +MV_CP_METHODOLOGY_SOURCE_PATTERN="^[[:space:]]*(sudo([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(env[[:space:]]+[^;&|]*[[:space:]]+)?(command([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(mv|cp)[[:space:]].*[[:space:]/\"']methodology-analysis-state\.md" # Replace shell operators with newlines, then check each segment # Order matters: |& before |, && before single & @@ -309,7 +425,17 @@ while IFS= read -r SEGMENT; do t again ') - # Check for finalize-state.md as SOURCE first (more specific pattern) + # Check for methodology-analysis-state.md as SOURCE first (most specific pattern) + if echo "$SEGMENT_CLEANED" | grep -qE "$MV_CP_METHODOLOGY_SOURCE_PATTERN"; then + # Check for cancel signal file - allow authorized cancel operation + if is_cancel_authorized "$ACTIVE_LOOP_DIR" "$COMMAND_LOWER"; then + exit 0 + fi + methodology_analysis_state_file_blocked_message >&2 + exit 2 + fi + + # Check for finalize-state.md as SOURCE (more specific than state.md) if echo "$SEGMENT_CLEANED" | grep -qE "$MV_CP_FINALIZE_SOURCE_PATTERN"; then # Check for cancel signal file - allow authorized cancel operation if is_cancel_authorized "$ACTIVE_LOOP_DIR" "$COMMAND_LOWER"; then @@ -333,6 +459,14 @@ done <<< "$COMMAND_SEGMENTS" # This catches bypass attempts like: sh -c 'mv state.md /tmp/foo' # Pattern: look for sh/bash with -c flag and state.md or finalize-state.md in the payload if echo "$COMMAND_LOWER" | grep -qE "(^|[[:space:]/])(sh|bash)[[:space:]]+-c[[:space:]]"; then + # Shell wrapper detected - check if payload contains mv/cp methodology-analysis-state.md (most specific) + if echo "$COMMAND_LOWER" | grep -qE "(mv|cp)[[:space:]].*methodology-analysis-state\.md"; then + if is_cancel_authorized "$ACTIVE_LOOP_DIR" "$COMMAND_LOWER"; then + exit 0 + fi + methodology_analysis_state_file_blocked_message >&2 + exit 2 + fi # Shell wrapper detected - check if payload contains mv/cp finalize-state.md (check first, more specific) if echo "$COMMAND_LOWER" | grep -qE "(mv|cp)[[:space:]].*finalize-state\.md"; then # Check for cancel signal file - allow authorized cancel operation @@ -373,12 +507,11 @@ fi # Round > 0: prompt to put request in summary if command_modifies_file "$COMMAND_LOWER" "goal-tracker\.md"; then + GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" if [[ "$CURRENT_ROUND" -eq 0 ]]; then - GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" goal_tracker_bash_blocked_message "$GOAL_TRACKER_PATH" >&2 else - SUMMARY_FILE="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" - goal_tracker_blocked_message "$CURRENT_ROUND" "$SUMMARY_FILE" >&2 + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 fi exit 2 fi @@ -404,6 +537,23 @@ if command_modifies_file "$COMMAND_LOWER" "round-[0-9]+-summary\.md"; then exit 2 fi +# ======================================== +# Block Round Contract File Modifications (All Rounds) +# ======================================== +# Round contracts should be written using Write or Edit tools so round scoping +# stays aligned with the current loop state. + +if command_modifies_file "$COMMAND_LOWER" "round-[0-9]+-contract\.md"; then + CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-contract.md" + FALLBACK="# Round Contract Bash Write Blocked + +Do not use Bash commands to modify round contract files. +Use the Write or Edit tool instead: {{CORRECT_PATH}}" + load_and_render_safe "$TEMPLATE_DIR" "block/round-contract-bash-write.md" "$FALLBACK" \ + "CORRECT_PATH=$CORRECT_PATH" >&2 + exit 2 +fi + # ======================================== # Block Todos File Modifications (All Rounds) # ======================================== @@ -420,52 +570,4 @@ fi fi # End of RLCR-specific checks -# ======================================== -# PR Loop File Protection -# ======================================== -# Block modifications to PR loop state and read-only files -# Note: ACTIVE_PR_LOOP_DIR was already set at the top of the script - -if [[ -n "$ACTIVE_PR_LOOP_DIR" ]]; then - # Block PR loop state.md modifications - # Check both full path pattern AND bare filename to catch relative path bypass - # (e.g., cd .humanize/pr-loop/timestamp && sed -i state.md) - if command_modifies_file "$COMMAND_LOWER" "\.humanize/pr-loop(/[^/]+)?/state\.md"; then - pr_loop_state_blocked_message >&2 - exit 2 - fi - # Bare filename check for state.md (catches relative path usage) - if command_modifies_file "$COMMAND_LOWER" "state\.md"; then - pr_loop_state_blocked_message >&2 - exit 2 - fi - - # Block PR loop read-only files: - # - round-N-pr-comment.md (fetched comments) - # - round-N-prompt.md (prompts from system) - # - round-N-codex-prompt.md (Codex prompts) - # - round-N-pr-check.md (Codex output) - # - round-N-pr-feedback.md (feedback for next round) - PR_LOOP_READONLY_PATTERNS=( - "round-[0-9]+-pr-comment\.md" - "round-[0-9]+-prompt\.md" - "round-[0-9]+-codex-prompt\.md" - "round-[0-9]+-pr-check\.md" - "round-[0-9]+-pr-feedback\.md" - ) - - for pattern in "${PR_LOOP_READONLY_PATTERNS[@]}"; do - # Check both full path pattern AND bare filename to catch relative path bypass - if command_modifies_file "$COMMAND_LOWER" "\.humanize/pr-loop(/[^/]+)?/${pattern}"; then - pr_loop_prompt_blocked_message >&2 - exit 2 - fi - # Bare filename check (catches relative path usage from within loop dir) - if command_modifies_file "$COMMAND_LOWER" "${pattern}"; then - pr_loop_prompt_blocked_message >&2 - exit 2 - fi - done -fi - exit 0 diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 4d397a19..0c191d4c 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Stop Hook for RLCR loop # @@ -39,17 +39,20 @@ HOOK_INPUT=$(cat) # Find Active Loop # ======================================== -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" -LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" - # Source shared loop functions and template loader SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" source "$SCRIPT_DIR/lib/loop-common.sh" +PROJECT_ROOT="$(resolve_project_root)" || exit 0 +LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" + # Source portable timeout wrapper for git operations PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "$PLUGIN_ROOT/scripts/portable-timeout.sh" +# Source methodology analysis library +source "$SCRIPT_DIR/lib/methodology-analysis.sh" + # Default timeout for git operations (30 seconds) GIT_TIMEOUT=30 @@ -58,13 +61,27 @@ GIT_TIMEOUT=30 # Extract session_id from hook input for session-aware loop filtering HOOK_SESSION_ID=$(extract_session_id "$HOOK_INPUT") -LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID") +LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID" true) # If no active loop (or session_id mismatch), allow exit if [[ -z "$LOOP_DIR" ]]; then exit 0 fi +# ======================================== +# Background-Task Guards +# ======================================== +# Delegates to handle_bg_task_short_circuit (hooks/lib/loop-bg-tasks.sh), +# which runs four cohesive guards in order: +# 1. Ambiguous-caller marker guard (no session_id + marker present) +# 2. Cross-session parked-loop guard (foreign session walking in) +# 3. Pending-bg short-circuit (this session has async work in flight) +# 4. Same-session stale-marker cleanup (bg work just finished) +# When any guard short-circuits, it emits the appropriate JSON on stdout +# and `exit 0`s directly; we never return from that call. When no guard +# fires we continue into the normal gate logic below. +handle_bg_task_short_circuit "$LOOP_DIR" "$HOOK_INPUT" "$HOOK_SESSION_ID" + # ======================================== # Detect Loop Phase: Normal or Finalize # ======================================== @@ -80,6 +97,9 @@ fi IS_FINALIZE_PHASE=false [[ "$STATE_FILE" == *"/finalize-state.md" ]] && IS_FINALIZE_PHASE=true +IS_METHODOLOGY_ANALYSIS_PHASE=false +[[ "$STATE_FILE" == *"/methodology-analysis-state.md" ]] && IS_METHODOLOGY_ANALYSIS_PHASE=true + # ======================================== # Parse State File (using shared function) # ======================================== @@ -120,6 +140,7 @@ CODEX_REVIEW_EFFORT="high" CODEX_TIMEOUT="${STATE_CODEX_TIMEOUT:-${CODEX_TIMEOUT:-$DEFAULT_CODEX_TIMEOUT}}" ASK_CODEX_QUESTION="${STATE_ASK_CODEX_QUESTION:-false}" AGENT_TEAMS="${STATE_AGENT_TEAMS:-false}" +PRIVACY_MODE="${STATE_PRIVACY_MODE:-true}" BITLESSON_REQUIRED="false" if [[ -n "$RAW_BITLESSON_REQUIRED" ]]; then BITLESSON_REQUIRED=$(echo "$RAW_BITLESSON_REQUIRED" | sed 's/^bitlesson_required:[[:space:]]*//' | tr -d ' "') @@ -145,6 +166,9 @@ fi if [[ "$BITLESSON_ALLOW_EMPTY_NONE" != "true" && "$BITLESSON_ALLOW_EMPTY_NONE" != "false" ]]; then BITLESSON_ALLOW_EMPTY_NONE="true" fi +MAINLINE_STALL_COUNT="${STATE_MAINLINE_STALL_COUNT:-0}" +LAST_MAINLINE_VERDICT="${STATE_LAST_MAINLINE_VERDICT:-$MAINLINE_VERDICT_UNKNOWN}" +DRIFT_STATUS="${STATE_DRIFT_STATUS:-$DRIFT_STATUS_NORMAL}" # Re-validate Codex Model and Effort for YAML safety (in case state.md was manually edited) # Use same validation patterns as setup-rlcr-loop.sh if [[ ! "$CODEX_EXEC_MODEL" =~ ^[a-zA-Z0-9._-]+$ ]]; then @@ -186,6 +210,13 @@ if [[ ! "$MAX_ITERATIONS" =~ ^[0-9]+$ ]]; then MAX_ITERATIONS=42 fi +if [[ ! "$MAINLINE_STALL_COUNT" =~ ^[0-9]+$ ]]; then + echo "Warning: Invalid mainline_stall_count '$MAINLINE_STALL_COUNT', defaulting to 0" >&2 + MAINLINE_STALL_COUNT=0 +fi +LAST_MAINLINE_VERDICT=$(normalize_mainline_progress_verdict "$LAST_MAINLINE_VERDICT") +DRIFT_STATUS=$(normalize_drift_status "$DRIFT_STATUS") + # ======================================== # Quick-check 0: Schema Validation (v1.1.2+ fields) # ======================================== @@ -572,6 +603,66 @@ Split these into smaller modules before continuing." fi fi +# ======================================== +# Methodology Analysis Phase Completion Handler +# ======================================== +# When in methodology analysis phase, check if the analysis is done. +# If done, rename state to the original exit reason's terminal state. +# If not done, block and ask Claude to complete the analysis. +# All other checks (summary, bitlesson, goal tracker, max iterations) are skipped. +# IMPORTANT: This MUST run before the git-clean check, because methodology +# artifacts (.humanize/rlcr/...) may make the working tree appear dirty +# if .humanize is tracked, which would block exit before reaching this handler. + +if [[ "$IS_METHODOLOGY_ANALYSIS_PHASE" == "true" ]]; then + if complete_methodology_analysis; then + # Before allowing the terminal state transition, re-verify the + # working tree is clean. The main git-clean gate below is skipped + # in the methodology branch, so without this check, tracked edits + # made during the analysis phase (e.g. post-signoff source + # modifications) could slip through unreviewed as soon as the + # completion marker appears. + # + # Apply the same .humanize/ untracked exclusion the main gate uses + # so methodology-artifact writes under .humanize/rlcr/... do not + # themselves trip the check. + if [[ "$GIT_IS_REPO" == "true" ]]; then + HUMANIZE_UNTRACKED_PATTERN='^\?\? \.humanize[-/]' + GIT_STATUS_FOR_BLOCK=$(echo "$GIT_STATUS_CACHED" | grep -vE "$HUMANIZE_UNTRACKED_PATTERN" || true) + if [[ -n "$GIT_STATUS_FOR_BLOCK" ]]; then + cleanup_stale_index_lock + FALLBACK="# Git Not Clean + +Methodology analysis is complete, but the working tree still has uncommitted changes: + +{{GIT_ISSUES}} + +Please commit all changes before allowing the loop to exit. +{{SPECIAL_NOTES}}" + REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/git-not-clean.md" "$FALLBACK" \ + "GIT_ISSUES=uncommitted changes after methodology analysis" \ + "SPECIAL_NOTES=") + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - uncommitted changes detected after methodology analysis, please commit first" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi + fi + # Analysis complete and tree clean, allow exit + exit 0 + else + # Analysis not yet complete, block + block_methodology_analysis_incomplete + exit 0 + fi +fi + # ======================================== # Quick Check: Git Clean and Pushed? # ======================================== @@ -583,6 +674,21 @@ if [[ "$GIT_IS_REPO" == "true" ]]; then GIT_ISSUES="" SPECIAL_NOTES="" + if git_has_tracked_humanize_state "$PROJECT_ROOT"; then + cleanup_stale_index_lock + REASON=$(git_tracked_humanize_blocked_message) + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - tracked Humanize state detected, remove it from git first" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi + # Check for uncommitted changes (staged or unstaged) using cached status. # Exclude untracked .humanize/ paths and .humanize-* dash-separated legacy # variants from the dirty determination because local plugin state under @@ -682,8 +788,10 @@ fi # In Finalize Phase, expect finalize-summary.md instead of round-N-summary.md if [[ "$IS_FINALIZE_PHASE" == "true" ]]; then SUMMARY_FILE="$LOOP_DIR/finalize-summary.md" + ROUND_CONTRACT_FILE="" else SUMMARY_FILE="$LOOP_DIR/round-${CURRENT_ROUND}-summary.md" + ROUND_CONTRACT_FILE="$LOOP_DIR/round-${CURRENT_ROUND}-contract.md" fi if [[ ! -f "$SUMMARY_FILE" ]]; then @@ -713,6 +821,39 @@ Please write your work summary to: {{SUMMARY_FILE}}" exit 0 fi +# Check Round Contract Exists +# ======================================== + +# Only enforce round contract when anti-drift is active (drift_status present in raw state). +# Legacy loops that pre-date the anti-drift feature will not have this field. +RAW_DRIFT_STATUS=$(echo "$RAW_FRONTMATTER" | grep "^drift_status:" || true) +if [[ "$IS_FINALIZE_PHASE" != "true" ]] && [[ -n "$RAW_DRIFT_STATUS" ]]; then + if [[ ! -f "$ROUND_CONTRACT_FILE" ]]; then + FALLBACK="# Round Contract Missing + +Before trying to exit, write the current round contract to: {{ROUND_CONTRACT_FILE}} + +The round contract must restate: +- The single mainline objective for this round +- The target ACs +- Which side issues are truly blocking +- Which side issues are queued and out of scope +- The success criteria for this round" + REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/round-contract-missing.md" "$FALLBACK" \ + "ROUND_CONTRACT_FILE=$ROUND_CONTRACT_FILE") + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Round contract missing for round $CURRENT_ROUND" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi +fi + # ======================================== # Check BitLesson Delta Section (all non-finalize rounds) # ======================================== @@ -742,7 +883,7 @@ GOAL_TRACKER_FILE="$LOOP_DIR/goal-tracker.md" # Skip this check in Finalize Phase, Review Phase, or when review_started is already true (skip-impl mode) # - Finalize Phase: goal tracker was already initialized before COMPLETE -# - Review Phase (review_started=true): skip-impl mode skips implementation, no goal tracker needed +# - Review Phase: later rounds may update only the mutable section, so Round 0 placeholder checks no longer apply if [[ "$IS_FINALIZE_PHASE" != "true" ]] && [[ "$REVIEW_STARTED" != "true" ]] && [[ "$CURRENT_ROUND" -eq 0 ]] && [[ -f "$GOAL_TRACKER_FILE" ]]; then # Check if goal-tracker.md still contains placeholder text # Extract each section and check for generic placeholder pattern within that section @@ -823,6 +964,10 @@ NEXT_ROUND=$((CURRENT_ROUND + 1)) # - Review Phase: must continue until [P?] issues are cleared, regardless of iteration count if [[ "$IS_FINALIZE_PHASE" != "true" ]] && [[ "$REVIEW_STARTED" != "true" ]] && [[ $NEXT_ROUND -gt $MAX_ITERATIONS ]]; then echo "RLCR loop did not complete, but reached max iterations ($MAX_ITERATIONS). Exiting." >&2 + # Try to enter methodology analysis phase before final exit + if enter_methodology_analysis_phase "maxiter" "Reached max iterations ($MAX_ITERATIONS) without completion"; then + exit 0 + fi end_loop "$LOOP_DIR" "$STATE_FILE" "$EXIT_MAXITER" exit 0 fi @@ -834,8 +979,12 @@ fi # No Codex review is performed - this is the final step after Codex already confirmed COMPLETE if [[ "$IS_FINALIZE_PHASE" == "true" ]]; then - echo "Finalize Phase complete. All checks passed. Loop finished!" >&2 - # Rename finalize-state.md to complete-state.md + echo "Finalize Phase complete. All checks passed." >&2 + # Try to enter methodology analysis phase before final exit + if enter_methodology_analysis_phase "complete" "All acceptance criteria met and code review passed"; then + exit 0 + fi + # Methodology analysis skipped or already done - proceed with normal exit mv "$STATE_FILE" "$LOOP_DIR/complete-state.md" echo "State preserved as: $LOOP_DIR/complete-state.md" >&2 exit 0 @@ -883,6 +1032,37 @@ COMPLETED_ITERATIONS=$((CURRENT_ROUND + 1)) PREV_ROUND=$(( CURRENT_ROUND > 0 ? CURRENT_ROUND - 1 : 0 )) PREV_PREV_ROUND=$(( CURRENT_ROUND > 1 ? CURRENT_ROUND - 2 : 0 )) +# Integral component: accumulated commit history and recent round references +# Validate BASE_COMMIT is an ancestor of HEAD (not just a valid object) before using it in git log +if [[ -n "$BASE_COMMIT" ]] && git -C "$PROJECT_ROOT" merge-base --is-ancestor "$BASE_COMMIT" HEAD 2>/dev/null; then + COMMIT_HISTORY=$(git -C "$PROJECT_ROOT" log --oneline --no-decorate --reverse "$BASE_COMMIT"..HEAD 2>/dev/null | tail -80) +else + COMMIT_HISTORY=$(git -C "$PROJECT_ROOT" log --oneline --no-decorate --reverse -30 2>/dev/null) + # Annotate so Codex knows this is not the full loop history + [[ -n "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(base commit unavailable, showing recent branch commits) +${COMMIT_HISTORY}" +fi +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +RECENT_ROUND_FILES="" +for (( r = CURRENT_ROUND - 1; r >= 0 && r >= CURRENT_ROUND - 3; r-- )); do + RECENT_ROUND_FILES+="- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-summary.md +- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-review-result.md +" +done +[[ -z "$RECENT_ROUND_FILES" ]] && RECENT_ROUND_FILES="(first round, no prior history)" + +COMMIT_HISTORY_SECTION_FALLBACK="## Development History (Integral Context) +\`\`\` +${COMMIT_HISTORY} +\`\`\` +### Recent Round Files +Read these files before conducting your review to understand the trajectory of work: +${RECENT_ROUND_FILES}" +COMMIT_HISTORY_SECTION=$(load_and_render_safe "$TEMPLATE_DIR" "codex/commit-history-section.md" "$COMMIT_HISTORY_SECTION_FALLBACK" \ + "COMMIT_HISTORY=$COMMIT_HISTORY" \ + "RECENT_ROUND_FILES=$RECENT_ROUND_FILES") + # Build the review prompt FULL_ALIGNMENT_FALLBACK="# Full Alignment Review (Round {{CURRENT_ROUND}}) @@ -891,6 +1071,8 @@ Review Claude's work against the plan and goal tracker. Check all goals are bein ## Claude's Summary {{SUMMARY_CONTENT}} +{{COMMIT_HISTORY_SECTION}} + {{GOAL_TRACKER_UPDATE_SECTION}} Write your review to {{REVIEW_RESULT_FILE}}. End with COMPLETE if done, or list issues." @@ -902,6 +1084,8 @@ Review Claude's work for this round. ## Claude's Summary {{SUMMARY_CONTENT}} +{{COMMIT_HISTORY_SECTION}} + {{GOAL_TRACKER_UPDATE_SECTION}} Write your review to {{REVIEW_RESULT_FILE}}. End with COMPLETE if done, or list issues." @@ -915,6 +1099,7 @@ if [[ "$FULL_ALIGNMENT_CHECK" == "true" ]]; then "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ "DOCS_PATH=$DOCS_PATH" \ "GOAL_TRACKER_UPDATE_SECTION=$GOAL_TRACKER_UPDATE_SECTION" \ + "COMMIT_HISTORY_SECTION=$COMMIT_HISTORY_SECTION" \ "COMPLETED_ITERATIONS=$COMPLETED_ITERATIONS" \ "LOOP_TIMESTAMP=$LOOP_TIMESTAMP" \ "PREV_ROUND=$PREV_ROUND" \ @@ -931,6 +1116,7 @@ else "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ "DOCS_PATH=$DOCS_PATH" \ "GOAL_TRACKER_UPDATE_SECTION=$GOAL_TRACKER_UPDATE_SECTION" \ + "COMMIT_HISTORY_SECTION=$COMMIT_HISTORY_SECTION" \ "COMPLETED_ITERATIONS=$COMPLETED_ITERATIONS" \ "LOOP_TIMESTAMP=$LOOP_TIMESTAMP" \ "PREV_ROUND=$PREV_ROUND" \ @@ -980,6 +1166,20 @@ mkdir -p "$CACHE_DIR" # portable-timeout.sh already sourced above +# Disable native hooks for nested Codex reviewer calls to prevent Stop-hook recursion. +# Probe whether the installed Codex CLI supports --disable; cache the result per loop +# so older builds do not fail with an unknown-argument error. +CODEX_DISABLE_HOOKS_ARGS=() +_CODEX_FEATURE_CACHE="$CACHE_DIR/.codex-disable-hooks-supported" +if [[ -f "$_CODEX_FEATURE_CACHE" ]]; then + [[ "$(cat "$_CODEX_FEATURE_CACHE")" == "yes" ]] && CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) +elif codex --help 2>&1 | grep -q -- '--disable'; then + CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) + echo "yes" > "$_CODEX_FEATURE_CACHE" 2>/dev/null +else + echo "no" > "$_CODEX_FEATURE_CACHE" 2>/dev/null +fi + # Build command arguments for summary review (codex exec) CODEX_EXEC_ARGS=("-m" "$CODEX_EXEC_MODEL") if [[ -n "$CODEX_EXEC_EFFORT" ]]; then @@ -1056,14 +1256,14 @@ Provider: codex echo "# Review base ($review_base_type): $review_base" echo "# Timeout: $CODEX_TIMEOUT seconds" echo "" - echo "codex review --base $review_base ${CODEX_REVIEW_ARGS[*]}" + echo "codex review ${CODEX_DISABLE_HOOKS_ARGS[*]} --base $review_base ${CODEX_REVIEW_ARGS[*]}" } > "$CODEX_REVIEW_CMD_FILE" echo "Code review command saved to: $CODEX_REVIEW_CMD_FILE" >&2 echo "Running codex review with timeout ${CODEX_TIMEOUT}s in $PROJECT_ROOT (base: $review_base)..." >&2 CODEX_REVIEW_EXIT_CODE=0 - (cd "$PROJECT_ROOT" && run_with_timeout "$CODEX_TIMEOUT" codex review --base "$review_base" "${CODEX_REVIEW_ARGS[@]}") \ + (cd "$PROJECT_ROOT" && run_with_timeout "$CODEX_TIMEOUT" codex review "${CODEX_DISABLE_HOOKS_ARGS[@]}" --base "$review_base" "${CODEX_REVIEW_ARGS[@]}") \ > "$CODEX_REVIEW_LOG_FILE" 2>&1 || CODEX_REVIEW_EXIT_CODE=$? echo "Code review exit code: $CODEX_REVIEW_EXIT_CODE" >&2 @@ -1218,6 +1418,79 @@ Follow the plan's per-task routing tags strictly: ROUTING_EOF } +# Stop the loop when mainline progress has stalled for too many consecutive rounds. +# Arguments: $1=stall_count, $2=last_verdict +stop_for_mainline_drift() { + local stall_count="$1" + local last_verdict="$2" + + upsert_state_fields "$STATE_FILE" \ + "${FIELD_MAINLINE_STALL_COUNT}=${stall_count}" \ + "${FIELD_LAST_MAINLINE_VERDICT}=${last_verdict}" \ + "${FIELD_DRIFT_STATUS}=${DRIFT_STATUS_REPLAN_REQUIRED}" + + local fallback="# Mainline Drift Circuit Breaker + +The RLCR loop has been stopped because the mainline failed to advance for {{STALL_COUNT}} consecutive implementation rounds. + +- Last mainline verdict: {{LAST_VERDICT}} +- Drift status: replan_required + +This loop should not continue automatically. Revisit the original plan, recover the round contract, and restart with a narrower mainline objective." + local reason + reason=$(load_and_render_safe "$TEMPLATE_DIR" "block/mainline-drift-stop.md" "$fallback" \ + "STALL_COUNT=$stall_count" \ + "LAST_VERDICT=$last_verdict" \ + "PLAN_FILE=$PLAN_FILE") + + end_loop "$LOOP_DIR" "$STATE_FILE" "$EXIT_STOP" + + jq -n \ + --arg reason "$reason" \ + --arg msg "Loop: Stopped - mainline drift circuit breaker triggered" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} + +# Block exit when implementation review output omits the required mainline verdict. +# Arguments: $1=review_result_file, $2=review_prompt_file +block_missing_mainline_verdict() { + local review_result_file="$1" + local review_prompt_file="$2" + + local fallback="# Mainline Verdict Missing + +The implementation review output is missing the required line: + +\`Mainline Progress Verdict: ADVANCED / STALLED / REGRESSED\` + +Humanize cannot safely update drift state or choose the correct next-round prompt without this verdict. + +Retry the exit so Codex reruns the implementation review. + +Files: +- Review result: {{REVIEW_RESULT_FILE}} +- Review prompt: {{REVIEW_PROMPT_FILE}}" + local reason + reason=$(load_and_render_safe "$TEMPLATE_DIR" "block/mainline-verdict-missing.md" "$fallback" \ + "REVIEW_RESULT_FILE=$review_result_file" \ + "REVIEW_PROMPT_FILE=$review_prompt_file") + + jq -n \ + --arg reason "$reason" \ + --arg msg "Loop: Blocked - implementation review missing Mainline Progress Verdict" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} + # Continue review loop when issues are found # Arguments: $1=round_number, $2=review_content continue_review_loop_with_issues() { @@ -1256,6 +1529,7 @@ continue_review_loop_with_issues() { - Notes: [what changed and why] EOF fi + local next_contract_file="$LOOP_DIR/round-${round}-contract.md" local fallback="# Code Review Findings @@ -1267,14 +1541,35 @@ You are in the **Review Phase** of the RLCR loop. Codex has performed a code rev ## Instructions -1. Address all issues marked with [P0-9] severity markers -2. Focus on fixes only - do not add new features -3. Commit your changes after fixing the issues -4. Write your summary to: {{SUMMARY_FILE}}" +1. Re-anchor on the original plan and current goal tracker before changing code +2. Refresh the round contract at {{ROUND_CONTRACT_FILE}} +3. Address only the issues that are truly blocking the current mainline objective or code-review acceptance +4. Record non-blocking follow-up items as queued, not as the main goal +5. Commit your changes after fixing the issues +6. Write your summary to: {{SUMMARY_FILE}}" load_and_render_safe "$TEMPLATE_DIR" "claude/review-phase-prompt.md" "$fallback" \ "REVIEW_CONTENT=$review_content" \ - "SUMMARY_FILE=$next_summary_file" > "$next_prompt_file" + "SUMMARY_FILE=$next_summary_file" \ + "BITLESSON_FILE=$BITLESSON_FILE" \ + "PLAN_FILE=$PLAN_FILE" \ + "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ + "ROUND_CONTRACT_FILE=$next_contract_file" \ + "CURRENT_ROUND=$round" > "$next_prompt_file" + if [[ "$BITLESSON_REQUIRED" == "true" ]] && ! grep -q 'bitlesson-selector' "$next_prompt_file"; then + cat >> "$next_prompt_file" << EOF + +## BitLesson Selection (REQUIRED FOR EACH FIX TASK) + +Before implementing each fix task, you MUST: + +1. Read @$BITLESSON_FILE +2. Run \`bitlesson-selector\` for each fix task/sub-task to select relevant lesson IDs +3. Follow the selected lesson IDs (or \`NONE\`) during implementation + +Reference: @$BITLESSON_FILE +EOF + fi append_task_tag_routing_note "$next_prompt_file" jq -n \ @@ -1387,7 +1682,7 @@ CODEX_PROMPT_CONTENT=$(cat "$REVIEW_PROMPT_FILE") echo "# Working directory: $PROJECT_ROOT" echo "# Timeout: $CODEX_TIMEOUT seconds" echo "" - echo "codex exec ${CODEX_EXEC_ARGS[*]} \"<prompt>\"" + echo "codex exec ${CODEX_DISABLE_HOOKS_ARGS[*]} ${CODEX_EXEC_ARGS[*]} \"<prompt>\"" echo "" echo "# Prompt content:" echo "$CODEX_PROMPT_CONTENT" @@ -1397,7 +1692,7 @@ echo "Codex command saved to: $CODEX_CMD_FILE" >&2 echo "Running summary review with timeout ${CODEX_TIMEOUT}s..." >&2 CODEX_EXIT_CODE=0 -printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$CODEX_TIMEOUT" codex exec "${CODEX_EXEC_ARGS[@]}" - \ +printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$CODEX_TIMEOUT" codex exec "${CODEX_DISABLE_HOOKS_ARGS[@]}" "${CODEX_EXEC_ARGS[@]}" - \ > "$CODEX_STDOUT_FILE" 2> "$CODEX_STDERR_FILE" || CODEX_EXIT_CODE=$? echo "Codex exit code: $CODEX_EXIT_CODE" >&2 @@ -1519,6 +1814,53 @@ REVIEW_CONTENT=$(cat "$REVIEW_RESULT_FILE") LAST_LINE=$(echo "$REVIEW_CONTENT" | grep -v '^[[:space:]]*$' | tail -1) LAST_LINE_TRIMMED=$(echo "$LAST_LINE" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') +NEXT_MAINLINE_STALL_COUNT="$MAINLINE_STALL_COUNT" +NEXT_LAST_MAINLINE_VERDICT="$LAST_MAINLINE_VERDICT" +NEXT_DRIFT_STATUS="$DRIFT_STATUS" +DRIFT_REPLAN_REQUIRED=false +MAINLINE_DRIFT_STOP=false + +if [[ "$REVIEW_STARTED" != "true" ]]; then + EXTRACTED_MAINLINE_VERDICT=$(extract_mainline_progress_verdict "$REVIEW_CONTENT") + + if [[ "$LAST_LINE_TRIMMED" != "$MARKER_STOP" ]] && [[ "$EXTRACTED_MAINLINE_VERDICT" == "$MAINLINE_VERDICT_UNKNOWN" ]]; then + echo "Implementation review output is missing Mainline Progress Verdict. Blocking exit for safety." >&2 + block_missing_mainline_verdict "$REVIEW_RESULT_FILE" "$REVIEW_PROMPT_FILE" + fi + + case "$EXTRACTED_MAINLINE_VERDICT" in + "$MAINLINE_VERDICT_ADVANCED") + NEXT_MAINLINE_STALL_COUNT=0 + NEXT_LAST_MAINLINE_VERDICT="$MAINLINE_VERDICT_ADVANCED" + NEXT_DRIFT_STATUS="$DRIFT_STATUS_NORMAL" + ;; + "$MAINLINE_VERDICT_STALLED"|"$MAINLINE_VERDICT_REGRESSED") + NEXT_MAINLINE_STALL_COUNT=$((MAINLINE_STALL_COUNT + 1)) + NEXT_LAST_MAINLINE_VERDICT="$EXTRACTED_MAINLINE_VERDICT" + if [[ "$NEXT_MAINLINE_STALL_COUNT" -ge 2 ]]; then + NEXT_DRIFT_STATUS="$DRIFT_STATUS_REPLAN_REQUIRED" + DRIFT_REPLAN_REQUIRED=true + else + NEXT_DRIFT_STATUS="$DRIFT_STATUS_NORMAL" + fi + if [[ "$NEXT_MAINLINE_STALL_COUNT" -ge 3 ]]; then + MAINLINE_DRIFT_STOP=true + fi + ;; + *) + : + ;; + esac + + if [[ "$LAST_LINE_TRIMMED" == "$MARKER_COMPLETE" ]]; then + NEXT_MAINLINE_STALL_COUNT=0 + NEXT_LAST_MAINLINE_VERDICT="$MAINLINE_VERDICT_ADVANCED" + NEXT_DRIFT_STATUS="$DRIFT_STATUS_NORMAL" + DRIFT_REPLAN_REQUIRED=false + MAINLINE_DRIFT_STOP=false + fi +fi + # Handle COMPLETE - enter Review Phase or Finalize Phase if [[ "$LAST_LINE_TRIMMED" == "$MARKER_COMPLETE" ]]; then # In review phase, COMPLETE signal is ignored - only absence of [P0-9] triggers finalize @@ -1530,6 +1872,9 @@ if [[ "$LAST_LINE_TRIMMED" == "$MARKER_COMPLETE" ]]; then # Max iterations check if [[ $CURRENT_ROUND -ge $MAX_ITERATIONS ]]; then echo "Codex review passed but at max iterations ($MAX_ITERATIONS). Terminating as MAXITER." >&2 + if enter_methodology_analysis_phase "maxiter" "Codex confirmed COMPLETE but at max iterations ($MAX_ITERATIONS)"; then + exit 0 + fi end_loop "$LOOP_DIR" "$STATE_FILE" "$EXIT_MAXITER" exit 0 fi @@ -1546,10 +1891,12 @@ if [[ "$LAST_LINE_TRIMMED" == "$MARKER_COMPLETE" ]]; then else echo "Implementation complete. Entering Review Phase..." >&2 - # Update state to indicate review phase has started - TEMP_FILE="${STATE_FILE}.tmp.$$" - sed "s/^review_started: .*/review_started: true/" "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" + # Update state to indicate review phase has started and clear drift counters. + upsert_state_fields "$STATE_FILE" \ + "${FIELD_REVIEW_STARTED}=true" \ + "${FIELD_MAINLINE_STALL_COUNT}=0" \ + "${FIELD_LAST_MAINLINE_VERDICT}=${MAINLINE_VERDICT_ADVANCED}" \ + "${FIELD_DRIFT_STATUS}=${DRIFT_STATUS_NORMAL}" REVIEW_STARTED="true" # Create marker file to validate review phase was properly entered @@ -1597,6 +1944,11 @@ Use \`/humanize:cancel-rlcr-loop\` to end this loop." run_and_handle_code_review "$((CURRENT_ROUND + 1))" "Loop: Finalize Phase - Code review passed" fi +if [[ "$MAINLINE_DRIFT_STOP" == "true" ]] && [[ "$LAST_LINE_TRIMMED" != "$MARKER_STOP" ]] && [[ "$LAST_LINE_TRIMMED" != "$MARKER_COMPLETE" ]]; then + echo "Mainline progress stalled for $NEXT_MAINLINE_STALL_COUNT consecutive rounds. Triggering drift circuit breaker." >&2 + stop_for_mainline_drift "$NEXT_MAINLINE_STALL_COUNT" "$NEXT_LAST_MAINLINE_VERDICT" +fi + # Handle STOP - circuit breaker triggered if [[ "$LAST_LINE_TRIMMED" == "$MARKER_STOP" ]]; then echo "" >&2 @@ -1623,6 +1975,10 @@ if [[ "$LAST_LINE_TRIMMED" == "$MARKER_STOP" ]]; then echo " $REVIEW_RESULT_FILE" >&2 fi echo "========================================" >&2 + # Try to enter methodology analysis phase before final exit + if enter_methodology_analysis_phase "stop" "Circuit breaker triggered - stagnation detected at round $CURRENT_ROUND"; then + exit 0 + fi end_loop "$LOOP_DIR" "$STATE_FILE" "$EXIT_STOP" exit 0 fi @@ -1632,9 +1988,11 @@ fi # ======================================== # Update state file for next round -TEMP_FILE="${STATE_FILE}.tmp.$$" -sed "s/^current_round: .*/current_round: $NEXT_ROUND/" "$STATE_FILE" > "$TEMP_FILE" -mv "$TEMP_FILE" "$STATE_FILE" +upsert_state_fields "$STATE_FILE" \ + "${FIELD_CURRENT_ROUND}=${NEXT_ROUND}" \ + "${FIELD_MAINLINE_STALL_COUNT}=${NEXT_MAINLINE_STALL_COUNT}" \ + "${FIELD_LAST_MAINLINE_VERDICT}=${NEXT_LAST_MAINLINE_VERDICT}" \ + "${FIELD_DRIFT_STATUS}=${NEXT_DRIFT_STATUS}" # Create next round prompt NEXT_PROMPT_FILE="$LOOP_DIR/round-${NEXT_ROUND}-prompt.md" @@ -1661,6 +2019,7 @@ if [[ ! -f "$NEXT_SUMMARY_FILE" ]]; then - Notes: [what changed and why] EOF fi +NEXT_CONTRACT_FILE="$LOOP_DIR/round-${NEXT_ROUND}-contract.md" # Build the next round prompt from templates NEXT_ROUND_FALLBACK="# Next Round Instructions @@ -1675,12 +2034,60 @@ Before executing tasks in this round: ## Codex Review {{REVIEW_CONTENT}} -Reference: {{PLAN_FILE}}, {{GOAL_TRACKER_FILE}}, {{BITLESSON_FILE}}" -load_and_render_safe "$TEMPLATE_DIR" "claude/next-round-prompt.md" "$NEXT_ROUND_FALLBACK" \ - "PLAN_FILE=$PLAN_FILE" \ - "REVIEW_CONTENT=$REVIEW_CONTENT" \ - "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ - "BITLESSON_FILE=$BITLESSON_FILE" > "$NEXT_PROMPT_FILE" +Reference: {{PLAN_FILE}}, {{GOAL_TRACKER_FILE}}, {{ROUND_CONTRACT_FILE}}, {{BITLESSON_FILE}}" +DRIFT_REPLAN_FALLBACK="# Drift Recovery Required + +The mainline has not advanced for {{STALL_COUNT}} consecutive implementation rounds. + +Last mainline verdict: {{LAST_MAINLINE_VERDICT}} + +Before writing code: +- Re-read @{{PLAN_FILE}} +- Re-read @{{GOAL_TRACKER_FILE}} +- Re-read the recent round summaries and review results +- Rewrite @{{ROUND_CONTRACT_FILE}} with a recovery-focused mainline objective + +Do not spend this round clearing queued work. Recover mainline progress first. + +## Codex Review +{{REVIEW_CONTENT}}" + +if [[ "$DRIFT_REPLAN_REQUIRED" == "true" ]]; then + load_and_render_safe "$TEMPLATE_DIR" "claude/drift-replan-prompt.md" "$DRIFT_REPLAN_FALLBACK" \ + "PLAN_FILE=$PLAN_FILE" \ + "REVIEW_CONTENT=$REVIEW_CONTENT" \ + "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ + "BITLESSON_FILE=$BITLESSON_FILE" \ + "ROUND_CONTRACT_FILE=$NEXT_CONTRACT_FILE" \ + "CURRENT_ROUND=$NEXT_ROUND" \ + "STALL_COUNT=$NEXT_MAINLINE_STALL_COUNT" \ + "LAST_MAINLINE_VERDICT=$NEXT_LAST_MAINLINE_VERDICT" > "$NEXT_PROMPT_FILE" +else + load_and_render_safe "$TEMPLATE_DIR" "claude/next-round-prompt.md" "$NEXT_ROUND_FALLBACK" \ + "PLAN_FILE=$PLAN_FILE" \ + "REVIEW_CONTENT=$REVIEW_CONTENT" \ + "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ + "BITLESSON_FILE=$BITLESSON_FILE" \ + "ROUND_CONTRACT_FILE=$NEXT_CONTRACT_FILE" \ + "CURRENT_ROUND=$NEXT_ROUND" \ + "STALL_COUNT=$NEXT_MAINLINE_STALL_COUNT" \ + "LAST_MAINLINE_VERDICT=$NEXT_LAST_MAINLINE_VERDICT" > "$NEXT_PROMPT_FILE" +fi + +if [[ "$DRIFT_REPLAN_REQUIRED" == "true" ]] && [[ "$BITLESSON_REQUIRED" == "true" ]] && ! grep -q 'bitlesson-selector' "$NEXT_PROMPT_FILE"; then + cat >> "$NEXT_PROMPT_FILE" << EOF + +## BitLesson Selection (REQUIRED FOR EACH TASK) + +Before executing each task or sub-task, you MUST: + +1. Read @$BITLESSON_FILE +2. Run \`bitlesson-selector\` for each task/sub-task to select relevant lesson IDs +3. Follow the selected lesson IDs (or \`NONE\`) during implementation + +Reference: @$BITLESSON_FILE +EOF +fi if [[ "$AGENT_TEAMS" == "true" ]]; then ENFORCEMENT_BLOCK="**Delegation Warning**: Do NOT implement code yourself in Agent Teams mode; delegate all coding tasks to team members." @@ -1797,6 +2204,9 @@ fi # Build system message SYSTEM_MSG="Loop: Round $NEXT_ROUND/$MAX_ITERATIONS - Codex found issues to address" +if [[ "$DRIFT_REPLAN_REQUIRED" == "true" ]]; then + SYSTEM_MSG="Loop: Round $NEXT_ROUND/$MAX_ITERATIONS - Mainline drift detected, replan required" +fi # Block exit and send review feedback jq -n \ diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 76cf9c03..fb9f8e1b 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -1,14 +1,13 @@ -#!/bin/bash +#!/usr/bin/env bash # -# PreToolUse Hook: Validate Edit paths for RLCR loop and PR loop +# PreToolUse Hook: Validate Edit paths for RLCR loop # # Blocks Claude from editing: # - Todos files (should use native Task tools instead) # - Prompt files (read-only, generated by Codex) # - State files (managed by hooks, not Claude) -# - Goal tracker after Round 0 -# - PR loop state files (.humanize/pr-loop/) -# - PR loop read-only files (pr-comment, prompt, codex-prompt, pr-check, pr-feedback) +# - Wrong round number contract files +# - Goal tracker edits outside the active loop or that alter the immutable section # set -euo pipefail @@ -39,7 +38,7 @@ HOOK_SESSION_ID=$(extract_session_id "$HOOK_INPUT") # ======================================== if is_round_file_type "$FILE_PATH_LOWER" "todos"; then - PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" + PROJECT_ROOT="$(resolve_project_root)" || exit 0 LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID") if [[ -z "$LOOP_DIR" ]] || ! is_allowlisted_file "$FILE_PATH" "$LOOP_DIR"; then @@ -54,29 +53,74 @@ if is_round_file_type "$FILE_PATH_LOWER" "prompt"; then fi # ======================================== -# PR Loop File Protection +# Methodology Analysis Phase Edit Restriction # ======================================== +# During methodology analysis, only methodology artifacts can be edited. +# This prevents source code modifications after Codex has signed off. +# This check MUST come before the humanize loop dir early exit below. -IN_PR_LOOP_DIR=$(is_in_pr_loop_dir "$FILE_PATH" && echo "true" || echo "false") - -if [[ "$IN_PR_LOOP_DIR" == "true" ]]; then - # Block state.md edits in PR loop - if is_state_file_path "$FILE_PATH_LOWER"; then - pr_loop_state_blocked_message >&2 - exit 2 +PROJECT_ROOT="${PROJECT_ROOT:-$(resolve_project_root 2>/dev/null || true)}" +[[ -z "$PROJECT_ROOT" ]] && exit 0 +LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" +# Use only the session-matched loop. Do NOT fall back to an unfiltered search, +# as that would incorrectly restrict unrelated sessions opened in the same repo. +# Limitation: Spawned agents (different session_id) are not restricted by hooks; +# their sanitization is enforced by the analysis prompt. +_MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" + +if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then + # If realpath fails (file doesn't exist yet on BSD/macOS), resolve parent dir + _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + if [[ -z "$_ma_real_path" ]]; then + _ma_parent=$(realpath "$(dirname "$FILE_PATH")" 2>/dev/null || echo "") + [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")" fi - - # Block read-only PR loop files - if is_pr_loop_readonly_file "$FILE_PATH_LOWER"; then - pr_loop_prompt_blocked_message >&2 - exit 2 + _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") + # Fallback to raw paths when realpath is unavailable (older macOS/BSD) + # Ensure paths are absolute and reject ".." to prevent traversal bypasses. + if [[ -z "$_ma_real_path" ]]; then + if [[ "$FILE_PATH" == *".."* ]]; then + echo "# Edit Blocked During Methodology Analysis + +Path contains traversal segments that cannot be resolved without realpath." >&2 + exit 2 + fi + # Fail closed if the leaf is a symlink we cannot resolve; the raw + # path would satisfy the loop-dir prefix check while pointing at a + # target outside the loop, letting the basename allowlist approve + # edits to arbitrary files during methodology-analysis mode. + if [[ -L "$FILE_PATH" ]]; then + echo "# Edit Blocked During Methodology Analysis + +Path is a symlink that cannot be resolved without realpath." >&2 + exit 2 + fi + if [[ "$FILE_PATH" == /* ]]; then + _ma_real_path="$FILE_PATH" + else + _ma_real_path="$PROJECT_ROOT/$FILE_PATH" + fi fi - - # For round-N-pr-resolve.md (Claude's resolution summary), validate round number - if is_pr_round_file_type "$FILE_PATH_LOWER" "pr-resolve"; then - validate_pr_resolve_round "$FILE_PATH_LOWER" "edit" || exit $? - exit 0 + if [[ -z "$_ma_real_loop" ]]; then + if [[ "$_MA_LOOP_DIR" == /* ]]; then + _ma_real_loop="$_MA_LOOP_DIR" + else + _ma_real_loop="$PROJECT_ROOT/$_MA_LOOP_DIR" + fi + fi + if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then + _ma_basename=$(basename "$_ma_real_path") + case "$_ma_basename" in + methodology-analysis-report.md|methodology-analysis-done.md) + exit 0 + ;; + esac fi + echo "# Edit Blocked During Methodology Analysis + +During the methodology analysis phase, only methodology artifacts can be edited. +Allowed: methodology-analysis-report.md, methodology-analysis-done.md" >&2 + exit 2 fi # ======================================== @@ -91,7 +135,8 @@ fi # Find Active Loop and Current Round # ======================================== -PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" +PROJECT_ROOT="${PROJECT_ROOT:-$(resolve_project_root 2>/dev/null || true)}" +[[ -z "$PROJECT_ROOT" ]] && exit 0 LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" @@ -101,6 +146,10 @@ fi # Detect if we're in Finalize Phase (finalize-state.md exists) STATE_FILE_TO_PARSE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") +IS_FINALIZE_PHASE=false +if [[ "$STATE_FILE_TO_PARSE" == *"/finalize-state.md" ]]; then + IS_FINALIZE_PHASE=true +fi # Parse state file using strict validation (fail closed on malformed state) if ! parse_state_file_strict "$STATE_FILE_TO_PARSE" 2>/dev/null; then @@ -110,9 +159,14 @@ fi CURRENT_ROUND="$STATE_CURRENT_ROUND" # ======================================== -# Block State File Edits (state.md and finalize-state.md) +# Block State File Edits (state.md, finalize-state.md, methodology-analysis-state.md) # ======================================== -# NOTE: Check finalize-state.md FIRST because is_state_file_path also matches finalize-state.md +# NOTE: Check most specific patterns first because is_state_file_path matches any *state.md + +if is_methodology_analysis_state_file_path "$FILE_PATH_LOWER"; then + methodology_analysis_state_file_blocked_message >&2 + exit 2 +fi if is_finalize_state_file_path "$FILE_PATH_LOWER"; then finalize_state_file_blocked_message >&2 @@ -124,6 +178,11 @@ if is_state_file_path "$FILE_PATH_LOWER"; then exit 2 fi +if [[ "$IS_FINALIZE_PHASE" == "true" ]] && is_round_file_type "$FILE_PATH_LOWER" "contract"; then + finalize_contract_blocked_message "edit" >&2 + exit 2 +fi + # ======================================== # Block Plan Backup Edits # ======================================== @@ -139,20 +198,52 @@ if [[ "$FILENAME" == "plan.md" ]]; then fi # ======================================== -# Block Goal Tracker After Round 0 +# Validate Goal Tracker Edits # ======================================== -if is_goal_tracker_path "$FILE_PATH_LOWER" && [[ "$CURRENT_ROUND" -gt 0 ]]; then - SUMMARY_FILE="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" - goal_tracker_blocked_message "$CURRENT_ROUND" "$SUMMARY_FILE" >&2 - exit 2 +if is_goal_tracker_path "$FILE_PATH_LOWER"; then + GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" + NORMALIZED_FILE_PATH=$(_normalize_path "$FILE_PATH") + NORMALIZED_GOAL_TRACKER_PATH=$(_normalize_path "$GOAL_TRACKER_PATH") + + if [[ "$NORMALIZED_FILE_PATH" != "$NORMALIZED_GOAL_TRACKER_PATH" ]]; then + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 + exit 2 + fi + + if [[ "$CURRENT_ROUND" -gt 0 ]]; then + if ! echo "$HOOK_INPUT" | jq -e '.tool_input | has("old_string") and has("new_string")' >/dev/null 2>&1; then + echo "Error: Missing required field: tool_input.old_string or tool_input.new_string" >&2 + exit 1 + fi + OLD_STRING=$(echo "$HOOK_INPUT" | jq -r '.tool_input.old_string // ""') + if [[ -z "$OLD_STRING" ]]; then + echo "Error: Missing required field: tool_input.old_string" >&2 + exit 1 + fi + + NEW_STRING=$(echo "$HOOK_INPUT" | jq -r '.tool_input.new_string // ""') + REPLACE_ALL=$(echo "$HOOK_INPUT" | jq -r '.tool_input.replace_all // false') + + if ! UPDATED_CONTENT=$(preview_edit_result "$GOAL_TRACKER_PATH" "$OLD_STRING" "$NEW_STRING" "$REPLACE_ALL" 2>/dev/null); then + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 + exit 2 + fi + + if ! goal_tracker_mutable_update_allowed "$GOAL_TRACKER_PATH" "$UPDATED_CONTENT"; then + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 + exit 2 + fi + fi + + exit 0 fi # ======================================== -# Validate Summary File Round Number +# Validate Summary/Contract File Round Number # ======================================== -if is_round_file_type "$FILE_PATH_LOWER" "summary"; then +if is_round_file_type "$FILE_PATH_LOWER" "summary" || is_round_file_type "$FILE_PATH_LOWER" "contract"; then # Extract filename from path (portable - works in bash and zsh) CLAUDE_FILENAME=$(echo "$FILE_PATH" | sed -n 's|.*\.humanize/rlcr/[^/]*/\(.*\)$|\1|p') if [[ -z "$CLAUDE_FILENAME" ]]; then @@ -161,9 +252,10 @@ if is_round_file_type "$FILE_PATH_LOWER" "summary"; then if [[ -n "$CLAUDE_FILENAME" ]]; then CLAUDE_ROUND=$(extract_round_number "$CLAUDE_FILENAME") + FILE_TYPE=$([[ "$FILE_PATH_LOWER" == *"-contract.md" ]] && echo "contract" || echo "summary") if [[ -n "$CLAUDE_ROUND" ]] && [[ "$CLAUDE_ROUND" != "$CURRENT_ROUND" ]] && ! is_allowlisted_file "$FILE_PATH" "$ACTIVE_LOOP_DIR"; then - CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" + CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-${FILE_TYPE}.md" FALLBACK="# Wrong Round Number You tried to {{ACTION}} round-{{CLAUDE_ROUND}}-{{FILE_TYPE}}.md but current round is **{{CURRENT_ROUND}}**. @@ -172,7 +264,7 @@ Edit: {{CORRECT_PATH}}" load_and_render_safe "$TEMPLATE_DIR" "block/wrong-round-number.md" "$FALLBACK" \ "ACTION=edit" \ "CLAUDE_ROUND=$CLAUDE_ROUND" \ - "FILE_TYPE=summary" \ + "FILE_TYPE=$FILE_TYPE" \ "CURRENT_ROUND=$CURRENT_ROUND" \ "CORRECT_PATH=$CORRECT_PATH" >&2 exit 2 diff --git a/hooks/loop-plan-file-validator.sh b/hooks/loop-plan-file-validator.sh index 595d408a..a5ce8c9f 100755 --- a/hooks/loop-plan-file-validator.sh +++ b/hooks/loop-plan-file-validator.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # UserPromptSubmit hook for plan file validation during RLCR loop # @@ -11,11 +11,12 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" # Source shared loop functions and template loader source "$SCRIPT_DIR/lib/loop-common.sh" +PROJECT_ROOT="$(resolve_project_root)" || exit 0 + # Source portable timeout wrapper for git operations PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "$PLUGIN_ROOT/scripts/portable-timeout.sh" diff --git a/hooks/loop-post-bash-hook.sh b/hooks/loop-post-bash-hook.sh index 22d80a11..020fa877 100755 --- a/hooks/loop-post-bash-hook.sh +++ b/hooks/loop-post-bash-hook.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PostToolUse Bash Hook for RLCR loop # @@ -26,8 +26,13 @@ set -euo pipefail # Read hook JSON input from stdin HOOK_INPUT=$(cat) -# Determine project root -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" +# Determine project root using the shared deterministic resolver. +# If neither CLAUDE_PROJECT_DIR nor a git toplevel is available, there +# is no active loop to patch - exit cleanly (pwd is NOT used as a +# fallback because it drifts with `cd` during a session). +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +source "$SCRIPT_DIR/lib/project-root.sh" +PROJECT_ROOT="$(resolve_project_root)" || exit 0 # Check for pending session_id signal file SIGNAL_FILE="$PROJECT_ROOT/.humanize/.pending-session-id" diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index f0b6f71f..b812288a 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -1,15 +1,13 @@ -#!/bin/bash +#!/usr/bin/env bash # -# PreToolUse Hook: Validate Read access for RLCR loop and PR loop files +# PreToolUse Hook: Validate Read access for RLCR loop files # # Blocks Claude from reading: -# - Wrong round's prompt/summary files (outdated information) +# - Wrong round's prompt/summary/contract files (outdated information) # - Round files from wrong locations (not in .humanize/rlcr/) # - Round files from old session directories # - Todos files (should use native Task tools instead) -# -# PR loop files (.humanize/pr-loop/) are generally allowed to read -# to give Claude access to comments, prompts, and feedback. +# - goal-tracker.md from old RLCR sessions # set -euo pipefail @@ -56,7 +54,7 @@ HOOK_SESSION_ID=$(extract_session_id "$HOOK_INPUT") # ======================================== if is_round_file_type "$FILE_PATH_LOWER" "todos"; then - PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" + PROJECT_ROOT="$(resolve_project_root)" || exit 0 LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID") if [[ -z "$LOOP_DIR" ]] || ! is_allowlisted_file "$FILE_PATH" "$LOOP_DIR"; then @@ -66,30 +64,149 @@ if is_round_file_type "$FILE_PATH_LOWER" "todos"; then fi # ======================================== -# Check for Round Files (summary/prompt) +# Methodology Analysis Phase Read Restriction +# ======================================== +# During methodology analysis, restrict reads of files within the loop +# directory to only the artifacts the analysis agent needs. This prevents +# project-specific information from leaking into the analysis report. +# Files outside the loop directory are allowed (Claude needs system files). +# This check MUST come before the summary/prompt early exit below, +# otherwise non-summary/prompt files in the loop dir escape restriction. + +PROJECT_ROOT="${PROJECT_ROOT:-$(resolve_project_root 2>/dev/null || true)}" +[[ -z "$PROJECT_ROOT" ]] && exit 0 +LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" +# Use only the session-matched loop. Do NOT fall back to an unfiltered search, +# as that would incorrectly restrict unrelated sessions opened in the same repo. +# Limitation: Spawned agents (different session_id) are not restricted by hooks; +# their sanitization is enforced by the analysis prompt. +ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" +_MA_CHECK_DIR="$ACTIVE_LOOP_DIR" + +if [[ -n "$_MA_CHECK_DIR" ]]; then + _MA_STATE=$(resolve_active_state_file "$_MA_CHECK_DIR") + if [[ "$_MA_STATE" == *"/methodology-analysis-state.md" ]]; then + # Canonicalize to prevent path traversal + # If realpath fails (file doesn't exist yet on BSD/macOS), resolve parent dir + _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + if [[ -z "$_ma_real_path" ]]; then + _ma_parent=$(realpath "$(dirname "$FILE_PATH")" 2>/dev/null || echo "") + [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")" + fi + _ma_real_loop=$(realpath "$_MA_CHECK_DIR" 2>/dev/null || echo "") + # Fallback to raw paths when realpath is unavailable (older macOS/BSD) + # Ensure paths are absolute so prefix guards cannot be bypassed. + # Reject paths with ".." segments to prevent traversal bypasses + # when we cannot canonicalize (fail closed). + if [[ -z "$_ma_real_path" ]]; then + if [[ "$FILE_PATH" == *".."* ]]; then + echo "# Read Blocked During Methodology Analysis + +Path contains traversal segments that cannot be resolved without realpath." >&2 + exit 2 + fi + # Fail closed if the file is a symlink we cannot resolve; the raw + # path would skip the project-root prefix guard, allowing a symlink + # outside the project to point back at restricted project content. + if [[ -L "$FILE_PATH" ]]; then + echo "# Read Blocked During Methodology Analysis + +Path is a symlink that cannot be resolved without realpath." >&2 + exit 2 + fi + if [[ "$FILE_PATH" == /* ]]; then + _ma_real_path="$FILE_PATH" + else + _ma_real_path="$PROJECT_ROOT/$FILE_PATH" + fi + fi + if [[ -z "$_ma_real_loop" ]]; then + if [[ "$_MA_CHECK_DIR" == /* ]]; then + _ma_real_loop="$_MA_CHECK_DIR" + else + _ma_real_loop="$PROJECT_ROOT/$_MA_CHECK_DIR" + fi + fi + if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then + _ma_basename=$(basename "$_ma_real_path") + # Allowlist: only methodology artifacts (not raw development records). + # Raw records (round-*-summary.md, round-*-review-result.md) are + # intentionally excluded so the originating session cannot read + # project-specific content and must rely solely on the sanitized + # methodology-analysis-report.md for all user-facing output. + # The spawned Opus agent reads raw records directly (not restricted + # by hooks due to different session_id -- see limitation comment above). + case "$_ma_basename" in + methodology-analysis-report.md|methodology-analysis-done.md|methodology-analysis-state.md) + exit 0 + ;; + *) + echo "# Read Blocked During Methodology Analysis + +Only methodology artifacts can be read from the loop directory during this phase. +Allowed: methodology-analysis-report.md, methodology-analysis-done.md, methodology-analysis-state.md" >&2 + exit 2 + ;; + esac + fi + # Files within the project root are blocked (project-specific information) + # Files outside the project root are allowed (system files, config, etc.) + _ma_project_real=$(realpath "$PROJECT_ROOT" 2>/dev/null || echo "$PROJECT_ROOT") + if [[ -n "$_ma_project_real" ]]; then + _ma_path_check="${_ma_real_path:-$FILE_PATH}" + if [[ "$_ma_path_check" == "$_ma_project_real/"* ]] || \ + [[ "$_ma_path_check" == "$PROJECT_ROOT/"* ]]; then + echo "# Read Blocked During Methodology Analysis + +Reading project files is not allowed during the methodology analysis phase. +Only methodology artifacts within the loop directory can be read. +Allowed: methodology-analysis-report.md, methodology-analysis-done.md, methodology-analysis-state.md" >&2 + exit 2 + fi + fi + exit 0 + fi +fi + +# ======================================== +# Check for Restricted RLCR Files # ======================================== -if ! is_round_file_type "$FILE_PATH_LOWER" "summary" && ! is_round_file_type "$FILE_PATH_LOWER" "prompt"; then +IS_GOAL_TRACKER=$(is_goal_tracker_path "$FILE_PATH_LOWER" && echo "true" || echo "false") +IS_ROUND_FILE=$( + if is_round_file_type "$FILE_PATH_LOWER" "summary" || \ + is_round_file_type "$FILE_PATH_LOWER" "prompt" || \ + is_round_file_type "$FILE_PATH_LOWER" "contract"; then + echo "true" + else + echo "false" + fi +) + +IN_HUMANIZE_LOOP_DIR=$(is_in_humanize_loop_dir "$FILE_PATH" && echo "true" || echo "false") +if [[ "$IS_ROUND_FILE" != "true" ]] && ! { [[ "$IS_GOAL_TRACKER" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]]; }; then exit 0 fi CLAUDE_FILENAME=$(basename "$FILE_PATH") -IN_HUMANIZE_LOOP_DIR=$(is_in_humanize_loop_dir "$FILE_PATH" && echo "true" || echo "false") # ======================================== # Find Active Loop and Current Round # ======================================== -PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" -LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" -ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" +# Re-use ACTIVE_LOOP_DIR if already set by methodology analysis check above +ACTIVE_LOOP_DIR="${ACTIVE_LOOP_DIR:-${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}}" if [[ -z "$ACTIVE_LOOP_DIR" ]]; then exit 0 fi -# Detect if we're in Finalize Phase (finalize-state.md exists) +# Detect loop phase from state file STATE_FILE_TO_PARSE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") +IS_FINALIZE_PHASE=false +if [[ "$STATE_FILE_TO_PARSE" == *"/finalize-state.md" ]]; then + IS_FINALIZE_PHASE=true +fi # Parse state file using strict validation (fail closed on malformed state) if ! parse_state_file_strict "$STATE_FILE_TO_PARSE" 2>/dev/null; then @@ -98,6 +215,35 @@ if ! parse_state_file_strict "$STATE_FILE_TO_PARSE" 2>/dev/null; then fi CURRENT_ROUND="$STATE_CURRENT_ROUND" +if [[ "$IS_FINALIZE_PHASE" == "true" ]] && is_round_file_type "$FILE_PATH_LOWER" "contract"; then + finalize_contract_blocked_message "read" >&2 + exit 2 +fi + +# ======================================== +# Validate Goal Tracker Path +# ======================================== + +if [[ "$IS_GOAL_TRACKER" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]]; then + CORRECT_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" + NORMALIZED_FILE_PATH=$(_normalize_path "$FILE_PATH") + NORMALIZED_CORRECT_PATH=$(_normalize_path "$CORRECT_PATH") + + if [[ "$NORMALIZED_FILE_PATH" != "$NORMALIZED_CORRECT_PATH" ]]; then + FALLBACK="# Wrong Goal Tracker Path + +Read the active loop goal tracker instead: {{CORRECT_PATH}}" + load_and_render_safe "$TEMPLATE_DIR" "block/wrong-file-location.md" "$FALLBACK" \ + "FILE_PATH=$FILE_PATH" \ + "ACTIVE_LOOP_DIR=$ACTIVE_LOOP_DIR" \ + "CURRENT_ROUND=$CURRENT_ROUND" \ + "CORRECT_PATH=$CORRECT_PATH" >&2 + exit 2 + fi + + exit 0 +fi + # ======================================== # Extract Round Number and File Type # ======================================== @@ -113,6 +259,8 @@ if is_round_file_type "$FILE_PATH_LOWER" "summary"; then FILE_TYPE="summary" elif is_round_file_type "$FILE_PATH_LOWER" "prompt"; then FILE_TYPE="prompt" +elif is_round_file_type "$FILE_PATH_LOWER" "contract"; then + FILE_TYPE="contract" fi # ======================================== @@ -156,7 +304,13 @@ fi CORRECT_PATH="$ACTIVE_LOOP_DIR/$CLAUDE_FILENAME" -if [[ "$FILE_PATH" != "$CORRECT_PATH" ]]; then +# Compare prefix-canonical forms -- see loop-write-validator.sh for the +# rationale; the same reasoning applies to read paths. A planted symlink +# at the leaf would otherwise let a Read follow the link outside the loop +# dir and still pass this validator. +_READ_FILE_REAL=$(canonicalize_path_prefix "$FILE_PATH") +_READ_CORRECT_REAL=$(canonicalize_path_prefix "$CORRECT_PATH") +if [[ "${_READ_FILE_REAL:-$FILE_PATH}" != "${_READ_CORRECT_REAL:-$CORRECT_PATH}" ]]; then FALLBACK="# Wrong Directory Path You tried to {{ACTION}} {{FILE_PATH}} but the correct path is {{CORRECT_PATH}}" diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 02090265..1d8f1e31 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -1,15 +1,14 @@ -#!/bin/bash +#!/usr/bin/env bash # -# PreToolUse Hook: Validate Write paths for RLCR loop and PR loop +# PreToolUse Hook: Validate Write paths for RLCR loop # # Blocks Claude from writing to: # - Todos files (should use native Task tools instead) # - Prompt files (read-only, generated by Codex) # - Wrong round number summary files +# - Wrong round number contract files # - Summary files outside .humanize/rlcr/ -# - Goal tracker after Round 0 -# - PR loop state files (.humanize/pr-loop/) -# - PR loop read-only files (pr-comment, prompt, codex-prompt, pr-check, pr-feedback) +# - Goal tracker writes outside the active loop or that alter the immutable section # set -euo pipefail @@ -56,7 +55,7 @@ HOOK_SESSION_ID=$(extract_session_id "$HOOK_INPUT") # ======================================== if is_round_file_type "$FILE_PATH_LOWER" "todos"; then - PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" + PROJECT_ROOT="$(resolve_project_root)" || exit 0 LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID") if [[ -z "$LOOP_DIR" ]] || ! is_allowlisted_file "$FILE_PATH" "$LOOP_DIR"; then @@ -71,29 +70,76 @@ if is_round_file_type "$FILE_PATH_LOWER" "prompt"; then fi # ======================================== -# PR Loop File Protection +# Methodology Analysis Phase Write Restriction # ======================================== +# During methodology analysis, only methodology artifacts can be written. +# This prevents source code modifications after Codex has signed off. +# This check MUST come before the file type early exits below. -IN_PR_LOOP_DIR=$(is_in_pr_loop_dir "$FILE_PATH" && echo "true" || echo "false") - -if [[ "$IN_PR_LOOP_DIR" == "true" ]]; then - # Block state.md writes in PR loop - if is_state_file_path "$FILE_PATH_LOWER"; then - pr_loop_state_blocked_message >&2 - exit 2 +PROJECT_ROOT="${PROJECT_ROOT:-$(resolve_project_root 2>/dev/null || true)}" +[[ -z "$PROJECT_ROOT" ]] && exit 0 +LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" +# Use only the session-matched loop. Do NOT fall back to an unfiltered search, +# as that would incorrectly restrict unrelated sessions opened in the same repo. +# Limitation: Spawned agents (different session_id) are not restricted by hooks; +# their sanitization is enforced by the analysis prompt. +_MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" + +if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then + # If realpath fails (file doesn't exist yet on BSD/macOS), resolve parent dir + _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + if [[ -z "$_ma_real_path" ]]; then + _ma_parent=$(realpath "$(dirname "$FILE_PATH")" 2>/dev/null || echo "") + [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")" fi - - # Block read-only PR loop files - if is_pr_loop_readonly_file "$FILE_PATH_LOWER"; then - pr_loop_prompt_blocked_message >&2 - exit 2 + _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") + # Fallback to raw paths when realpath is unavailable (older macOS/BSD) + # Ensure paths are absolute so prefix guards cannot be bypassed. + # Reject paths with ".." segments to prevent traversal bypasses + # when we cannot canonicalize (fail closed). + if [[ -z "$_ma_real_path" ]]; then + if [[ "$FILE_PATH" == *".."* ]]; then + echo "# Write Blocked During Methodology Analysis + +Path contains traversal segments that cannot be resolved without realpath." >&2 + exit 2 + fi + # Fail closed if the leaf is a symlink we cannot resolve; the raw + # path would satisfy the loop-dir prefix check while pointing at a + # target outside the loop, letting the basename allowlist approve + # writes to arbitrary files during methodology-analysis mode. + if [[ -L "$FILE_PATH" ]]; then + echo "# Write Blocked During Methodology Analysis + +Path is a symlink that cannot be resolved without realpath." >&2 + exit 2 + fi + if [[ "$FILE_PATH" == /* ]]; then + _ma_real_path="$FILE_PATH" + else + _ma_real_path="$PROJECT_ROOT/$FILE_PATH" + fi fi - - # For round-N-pr-resolve.md (Claude's resolution summary), validate round number - if is_pr_round_file_type "$FILE_PATH_LOWER" "pr-resolve"; then - validate_pr_resolve_round "$FILE_PATH_LOWER" "write to" || exit $? - exit 0 + if [[ -z "$_ma_real_loop" ]]; then + if [[ "$_MA_LOOP_DIR" == /* ]]; then + _ma_real_loop="$_MA_LOOP_DIR" + else + _ma_real_loop="$PROJECT_ROOT/$_MA_LOOP_DIR" + fi + fi + if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then + _ma_basename=$(basename "$_ma_real_path") + case "$_ma_basename" in + methodology-analysis-report.md|methodology-analysis-done.md) + exit 0 + ;; + esac fi + echo "# Write Blocked During Methodology Analysis + +During the methodology analysis phase, only methodology artifacts can be written. +Allowed: methodology-analysis-report.md, methodology-analysis-done.md" >&2 + exit 2 fi # ======================================== @@ -101,20 +147,21 @@ fi # ======================================== IS_SUMMARY_FILE=$(is_round_file_type "$FILE_PATH_LOWER" "summary" && echo "true" || echo "false") +IS_CONTRACT_FILE=$(is_round_file_type "$FILE_PATH_LOWER" "contract" && echo "true" || echo "false") IS_FINALIZE_SUMMARY=$(is_finalize_summary_path "$FILE_PATH_LOWER" && echo "true" || echo "false") IN_HUMANIZE_LOOP_DIR=$(is_in_humanize_loop_dir "$FILE_PATH" && echo "true" || echo "false") -# If not a summary file, not a finalize summary, and not in .humanize/rlcr, allow normally -if [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "false" ]]; then +# If not a summary file, not a contract file, not a finalize summary, and not in .humanize/rlcr, allow normally +if [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_CONTRACT_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "false" ]]; then exit 0 fi -# For state.md, finalize-state.md, goal-tracker.md, and plan.md in .humanize/rlcr, we need further validation -# For other files in .humanize/rlcr that aren't summaries, allow them +# For state.md, finalize-state.md, methodology-analysis-state.md, goal-tracker.md, and plan.md in .humanize/rlcr, we need further validation +# For other files in .humanize/rlcr that aren't summaries/contracts, allow them FILENAME=$(basename "$FILE_PATH") IS_PLAN_BACKUP=$([[ "$FILENAME" == "plan.md" ]] && echo "true" || echo "false") -if [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]] && [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ]]; then - if ! is_state_file_path "$FILE_PATH_LOWER" && ! is_finalize_state_file_path "$FILE_PATH_LOWER" && ! is_goal_tracker_path "$FILE_PATH_LOWER" && [[ "$IS_PLAN_BACKUP" != "true" ]]; then +if [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]] && [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_CONTRACT_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ]]; then + if ! is_state_file_path "$FILE_PATH_LOWER" && ! is_finalize_state_file_path "$FILE_PATH_LOWER" && ! is_methodology_analysis_state_file_path "$FILE_PATH_LOWER" && ! is_goal_tracker_path "$FILE_PATH_LOWER" && [[ "$IS_PLAN_BACKUP" != "true" ]]; then exit 0 fi fi @@ -124,7 +171,8 @@ fi # ======================================== # Re-initialize if not set by earlier todos check -PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" +PROJECT_ROOT="${PROJECT_ROOT:-$(resolve_project_root 2>/dev/null || true)}" +[[ -z "$PROJECT_ROOT" ]] && exit 0 LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" @@ -147,9 +195,14 @@ fi CURRENT_ROUND="$STATE_CURRENT_ROUND" # ======================================== -# Block State File Writes (state.md and finalize-state.md) +# Block State File Writes (state.md, finalize-state.md, methodology-analysis-state.md) # ======================================== -# NOTE: Check finalize-state.md FIRST because is_state_file_path also matches finalize-state.md +# NOTE: Check most specific patterns first because is_state_file_path matches any *state.md + +if is_methodology_analysis_state_file_path "$FILE_PATH_LOWER"; then + methodology_analysis_state_file_blocked_message >&2 + exit 2 +fi if is_finalize_state_file_path "$FILE_PATH_LOWER"; then finalize_state_file_blocked_message >&2 @@ -174,6 +227,12 @@ if [[ "$IS_FINALIZE_SUMMARY" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "true fi fi +# There is no active round contract once the loop has entered Finalize Phase. +if [[ "$IS_FINALIZE_PHASE" == "true" ]] && [[ "$IS_CONTRACT_FILE" == "true" ]]; then + finalize_contract_blocked_message "write to" >&2 + exit 2 +fi + # ======================================== # Block Plan Backup Writes # ======================================== @@ -188,26 +247,54 @@ if [[ "$IS_PLAN_BACKUP" == "true" ]]; then fi # ======================================== -# Block Goal Tracker After Round 0 +# Validate Goal Tracker Writes # ======================================== -if is_goal_tracker_path "$FILE_PATH_LOWER" && [[ "$CURRENT_ROUND" -gt 0 ]]; then - SUMMARY_FILE="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" - goal_tracker_blocked_message "$CURRENT_ROUND" "$SUMMARY_FILE" >&2 - exit 2 +if is_goal_tracker_path "$FILE_PATH_LOWER"; then + GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" + NORMALIZED_FILE_PATH=$(_normalize_path "$FILE_PATH") + NORMALIZED_GOAL_TRACKER_PATH=$(_normalize_path "$GOAL_TRACKER_PATH") + + if [[ "$NORMALIZED_FILE_PATH" != "$NORMALIZED_GOAL_TRACKER_PATH" ]]; then + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 + exit 2 + fi + + if [[ "$CURRENT_ROUND" -gt 0 ]]; then + if ! require_tool_input_field "$HOOK_INPUT" "content"; then + exit 1 + fi + + UPDATED_CONTENT=$(echo "$HOOK_INPUT" | jq -r '.tool_input.content // ""') + if ! goal_tracker_mutable_update_allowed "$GOAL_TRACKER_PATH" "$UPDATED_CONTENT"; then + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 + exit 2 + fi + fi + + exit 0 fi # ======================================== -# Block Summary Files Outside .humanize/rlcr +# Block Summary/Contract Files Outside .humanize/rlcr # ======================================== -if [[ "$IS_SUMMARY_FILE" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "false" ]]; then - CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" - FALLBACK="# Wrong Summary Location +if [[ "$IS_SUMMARY_FILE" == "true" || "$IS_CONTRACT_FILE" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "false" ]]; then + if [[ "$IS_CONTRACT_FILE" == "true" ]]; then + CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-contract.md" + FALLBACK="# Wrong Round Contract Location + +Write the round contract to the correct path: {{CORRECT_PATH}}" + load_and_render_safe "$TEMPLATE_DIR" "block/wrong-contract-location.md" "$FALLBACK" \ + "CORRECT_PATH=$CORRECT_PATH" >&2 + else + CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" + FALLBACK="# Wrong Summary Location Write summary to the correct path: {{CORRECT_PATH}}" - load_and_render_safe "$TEMPLATE_DIR" "block/wrong-summary-location.md" "$FALLBACK" \ - "CORRECT_PATH=$CORRECT_PATH" >&2 + load_and_render_safe "$TEMPLATE_DIR" "block/wrong-summary-location.md" "$FALLBACK" \ + "CORRECT_PATH=$CORRECT_PATH" >&2 + fi exit 2 fi @@ -224,14 +311,15 @@ if [[ -z "$CLAUDE_FILENAME" ]]; then fi # ======================================== -# Validate Round Number (for summary files) +# Validate Round Number (for summary/contract files) # ======================================== -if [[ "$IS_SUMMARY_FILE" == "true" ]]; then +if [[ "$IS_SUMMARY_FILE" == "true" || "$IS_CONTRACT_FILE" == "true" ]]; then CLAUDE_ROUND=$(extract_round_number "$CLAUDE_FILENAME") + FILE_TYPE=$([[ "$IS_CONTRACT_FILE" == "true" ]] && echo "contract" || echo "summary") if [[ -n "$CLAUDE_ROUND" ]] && [[ "$CLAUDE_ROUND" != "$CURRENT_ROUND" ]] && ! is_allowlisted_file "$FILE_PATH" "$ACTIVE_LOOP_DIR"; then - CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" + CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-${FILE_TYPE}.md" FALLBACK="# Wrong Round Number You tried to {{ACTION}} round-{{CLAUDE_ROUND}}-{{FILE_TYPE}}.md but current round is **{{CURRENT_ROUND}}**. @@ -240,7 +328,7 @@ Write to: {{CORRECT_PATH}}" load_and_render_safe "$TEMPLATE_DIR" "block/wrong-round-number.md" "$FALLBACK" \ "ACTION=write to" \ "CLAUDE_ROUND=$CLAUDE_ROUND" \ - "FILE_TYPE=summary" \ + "FILE_TYPE=$FILE_TYPE" \ "CURRENT_ROUND=$CURRENT_ROUND" \ "CORRECT_PATH=$CORRECT_PATH" >&2 exit 2 @@ -253,7 +341,16 @@ fi CORRECT_PATH="$ACTIVE_LOOP_DIR/$CLAUDE_FILENAME" -if [[ "$FILE_PATH" != "$CORRECT_PATH" ]]; then +# Compare prefix-canonical forms so the check is not fooled by equivalent +# paths expressed in different ancestor forms (e.g. /var/... vs /private/var/... +# on macOS) -- without dereferencing the leaf. Using full realpath here +# would let a planted symlink at <loop>/<CLAUDE_FILENAME> pointing outside +# the loop dir approve a write through the link, escalating Claude's write +# reach beyond the loop dir. canonicalize_path_prefix resolves the parent +# directory only; the basename is compared verbatim. +_WRITE_FILE_REAL=$(canonicalize_path_prefix "$FILE_PATH") +_WRITE_CORRECT_REAL=$(canonicalize_path_prefix "$CORRECT_PATH") +if [[ "${_WRITE_FILE_REAL:-$FILE_PATH}" != "${_WRITE_CORRECT_REAL:-$CORRECT_PATH}" ]]; then FALLBACK="# Wrong Directory Path You tried to {{ACTION}} {{FILE_PATH}} but the correct path is {{CORRECT_PATH}}" diff --git a/hooks/pr-loop-stop-hook.sh b/hooks/pr-loop-stop-hook.sh deleted file mode 100755 index f02710e2..00000000 --- a/hooks/pr-loop-stop-hook.sh +++ /dev/null @@ -1,1646 +0,0 @@ -#!/bin/bash -# -# Stop Hook for PR loop -# -# Intercepts Claude's exit attempts, polls for remote bot reviews, -# and uses local Codex to validate if bot concerns are addressed. -# -# Key features: -# - Polls until ALL active bots respond (per-bot tracking with 15min timeout each) -# - Checks PR state before polling (detects CLOSED/MERGED) -# - Uses APPROVE marker for Codex approval -# - Updates active_bots list based on per-bot approval -# -# State directory: .humanize/pr-loop/<timestamp>/ -# State file: state.md (current_round, pr_number, active_bots as YAML list, etc.) -# Resolve file: round-N-pr-resolve.md (Claude's resolution summary) -# Comment file: round-N-pr-comment.md (Fetched PR comments) -# Check file: round-N-pr-check.md (Local Codex validation) -# Feedback file: round-N-pr-feedback.md (Feedback for next round) -# - -set -euo pipefail - -# ======================================== -# Default Configuration -# ======================================== - -# Override effort before sourcing loop-common.sh (PR loop defaults to medium effort). -# codex_model is NOT pre-set here so that config-backed values from loop-common.sh apply. -DEFAULT_CODEX_EFFORT="medium" -DEFAULT_CODEX_TIMEOUT=900 -DEFAULT_POLL_INTERVAL=30 -DEFAULT_POLL_TIMEOUT=900 # 15 minutes per bot - -# Note: Bot name mapping functions (map_bot_to_author, map_author_to_bot) -# and helper functions (build_yaml_list, build_bot_mention_string) are -# provided by loop-common.sh which is sourced below. - -# ======================================== -# Read Hook Input -# ======================================== - -HOOK_INPUT=$(cat) - -# ======================================== -# Find Active Loop -# ======================================== - -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" -LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/pr-loop" - -# Source shared loop functions -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -source "$SCRIPT_DIR/lib/loop-common.sh" - -# Source portable timeout wrapper -PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -TEMPLATE_DIR="$PLUGIN_ROOT/prompt-template" -source "$PLUGIN_ROOT/scripts/portable-timeout.sh" - -# Default timeout for git/gh operations -GIT_TIMEOUT=30 -GH_TIMEOUT=60 - -# Use shared find_active_pr_loop function from loop-common.sh -LOOP_DIR=$(find_active_pr_loop "$LOOP_BASE_DIR") - -# If no active PR loop, let other hooks handle -if [[ -z "$LOOP_DIR" ]]; then - exit 0 -fi - -STATE_FILE="$LOOP_DIR/state.md" - -if [[ ! -f "$STATE_FILE" ]]; then - exit 0 -fi - -# ======================================== -# Parse State File (YAML list format for active_bots) -# ======================================== - -# Declare arrays outside function for macOS Bash 3.2 compatibility -# (declare -g requires Bash 4.2+, which macOS doesn't have by default) -PR_CONFIGURED_BOTS_ARRAY=() -PR_ACTIVE_BOTS_ARRAY=() - -parse_pr_loop_state() { - local state_file="$1" - - STATE_FRONTMATTER=$(sed -n '/^---$/,/^---$/{ /^---$/d; p; }' "$state_file" 2>/dev/null || echo "") - - PR_CURRENT_ROUND=$(echo "$STATE_FRONTMATTER" | grep "^current_round:" | sed "s/current_round: *//" | tr -d ' ' || true) - PR_MAX_ITERATIONS=$(echo "$STATE_FRONTMATTER" | grep "^max_iterations:" | sed "s/max_iterations: *//" | tr -d ' ' || true) - PR_NUMBER=$(echo "$STATE_FRONTMATTER" | grep "^pr_number:" | sed "s/pr_number: *//" | tr -d ' ' || true) - PR_START_BRANCH=$(echo "$STATE_FRONTMATTER" | grep "^start_branch:" | sed "s/start_branch: *//; s/^\"//; s/\"\$//" || true) - PR_CODEX_MODEL=$(echo "$STATE_FRONTMATTER" | grep "^codex_model:" | sed "s/codex_model: *//" | tr -d ' ' || true) - PR_CODEX_EFFORT=$(echo "$STATE_FRONTMATTER" | grep "^codex_effort:" | sed "s/codex_effort: *//" | tr -d ' ' || true) - PR_CODEX_TIMEOUT=$(echo "$STATE_FRONTMATTER" | grep "^codex_timeout:" | sed "s/codex_timeout: *//" | tr -d ' ' || true) - PR_POLL_INTERVAL=$(echo "$STATE_FRONTMATTER" | grep "^poll_interval:" | sed "s/poll_interval: *//" | tr -d ' ' || true) - PR_POLL_TIMEOUT=$(echo "$STATE_FRONTMATTER" | grep "^poll_timeout:" | sed "s/poll_timeout: *//" | tr -d ' ' || true) - PR_STARTED_AT=$(echo "$STATE_FRONTMATTER" | grep "^started_at:" | sed "s/started_at: *//" || true) - PR_LAST_TRIGGER_AT=$(echo "$STATE_FRONTMATTER" | grep "^last_trigger_at:" | sed "s/last_trigger_at: *//" || true) - - # New state fields for Cases 1-5 and force push detection - PR_STARTUP_CASE=$(echo "$STATE_FRONTMATTER" | grep "^startup_case:" | sed "s/startup_case: *//" | tr -d ' ' || true) - PR_LATEST_COMMIT_SHA=$(echo "$STATE_FRONTMATTER" | grep "^latest_commit_sha:" | sed "s/latest_commit_sha: *//" | tr -d ' ' || true) - PR_LATEST_COMMIT_AT=$(echo "$STATE_FRONTMATTER" | grep "^latest_commit_at:" | sed "s/latest_commit_at: *//" || true) - PR_TRIGGER_COMMENT_ID=$(echo "$STATE_FRONTMATTER" | grep "^trigger_comment_id:" | sed "s/trigger_comment_id: *//" | tr -d ' ' || true) - - # Parse configured_bots and active_bots as YAML lists - # configured_bots: never changes, used for polling all bots (allows re-add) - # active_bots: current bots with issues, shrinks as bots approve - # Arrays are declared outside function for macOS Bash 3.2 compatibility - PR_CONFIGURED_BOTS_ARRAY=() - PR_ACTIVE_BOTS_ARRAY=() - - # Parse YAML list helper function - # NOTE: Avoids 'local -n' (nameref) which requires Bash 4.3+ and fails on macOS Bash 3.2 - # Instead, outputs values to stdout and caller captures into array - parse_yaml_list() { - local field_name="$1" - local in_field=false - - while IFS= read -r line; do - if [[ "$line" =~ ^${field_name}: ]]; then - in_field=true - # Check if it's inline format: field: value - local inline_value="${line#*: }" - if [[ -n "$inline_value" && "$inline_value" != "${field_name}:" ]]; then - # Old comma-separated format for backwards compatibility - echo "$inline_value" | tr ',' '\n' | tr -d ' ' - in_field=false - fi - continue - fi - if [[ "$in_field" == "true" ]]; then - if [[ "$line" =~ ^[[:space:]]+-[[:space:]]+ ]]; then - # Extract bot name from " - botname" - local bot_name="${line#*- }" - bot_name=$(echo "$bot_name" | tr -d ' ') - if [[ -n "$bot_name" ]]; then - echo "$bot_name" - fi - elif [[ "$line" =~ ^[a-zA-Z_] ]]; then - # New field started, stop parsing - in_field=false - fi - fi - done <<< "$STATE_FRONTMATTER" - } - - # Read parsed values into arrays (macOS Bash 3.2 compatible) - while IFS= read -r bot; do - [[ -n "$bot" ]] && PR_CONFIGURED_BOTS_ARRAY+=("$bot") - done < <(parse_yaml_list "configured_bots") - - while IFS= read -r bot; do - [[ -n "$bot" ]] && PR_ACTIVE_BOTS_ARRAY+=("$bot") - done < <(parse_yaml_list "active_bots") - - # Backwards compatibility: if configured_bots is empty, use active_bots - if [[ ${#PR_CONFIGURED_BOTS_ARRAY[@]} -eq 0 ]]; then - PR_CONFIGURED_BOTS_ARRAY=("${PR_ACTIVE_BOTS_ARRAY[@]}") - fi - - # Apply defaults - PR_CURRENT_ROUND="${PR_CURRENT_ROUND:-0}" - PR_MAX_ITERATIONS="${PR_MAX_ITERATIONS:-42}" - PR_CODEX_MODEL="${PR_CODEX_MODEL:-$DEFAULT_CODEX_MODEL}" - PR_CODEX_EFFORT="${PR_CODEX_EFFORT:-$DEFAULT_CODEX_EFFORT}" - PR_CODEX_TIMEOUT="${PR_CODEX_TIMEOUT:-$DEFAULT_CODEX_TIMEOUT}" - PR_POLL_INTERVAL="${PR_POLL_INTERVAL:-$DEFAULT_POLL_INTERVAL}" - PR_POLL_TIMEOUT="${PR_POLL_TIMEOUT:-$DEFAULT_POLL_TIMEOUT}" -} - -parse_pr_loop_state "$STATE_FILE" - -# Build display string and mention string from active bots array -PR_ACTIVE_BOTS_DISPLAY=$(IFS=', '; echo "${PR_ACTIVE_BOTS_ARRAY[*]}") -PR_CONFIGURED_BOTS_DISPLAY=$(IFS=', '; echo "${PR_CONFIGURED_BOTS_ARRAY[*]}") - -# Build mention string from configured bots (for detecting trigger comments) -PR_BOT_MENTION_STRING=$(build_bot_mention_string "${PR_CONFIGURED_BOTS_ARRAY[@]}") - -# Validate required fields -if [[ -z "$PR_NUMBER" ]]; then - echo "Error: PR number not found in state file" >&2 - exit 0 -fi - -if [[ ! "$PR_CURRENT_ROUND" =~ ^[0-9]+$ ]]; then - echo "Warning: Invalid current_round in state file" >&2 - exit 0 -fi - -# ======================================== -# Resolve PR Base Repository (for fork PRs) -# ======================================== -# IMPORTANT: For fork PRs, comments are on the base repository, not the fork. -# gh pr view without --repo fails in forks because the PR number doesn't exist there. -# Strategy: First get current repo, check if PR exists there, then try parent repo for forks. -# NOTE: This MUST be done BEFORE PR state checks, which also need --repo for forks. - -# Step 1: Get the current repo (works in both forks and base repos) -CURRENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json owner,name \ - -q '.owner.login + "/" + .name' 2>/dev/null) || CURRENT_REPO="" - -# Step 2: Determine the correct repo for PR operations -# Try current repo first - if PR exists there, use it -PR_BASE_REPO="" -PR_LOOKUP_REPO="" # Repo where PR was found (for subsequent lookups) - -if [[ -n "$CURRENT_REPO" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" - PR_LOOKUP_REPO="$CURRENT_REPO" - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - # PR not found in current repo - check if this is a fork and try parent repo - PARENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json parent \ - -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - PR_LOOKUP_REPO="$PARENT_REPO" - fi - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - echo "Warning: Could not resolve PR base repository, using current repo" >&2 - PR_BASE_REPO="$CURRENT_REPO" - PR_LOOKUP_REPO="$CURRENT_REPO" -fi - -# ======================================== -# Check PR State (detect CLOSED/MERGED before polling) -# ======================================== -# NOTE: Uses PR_LOOKUP_REPO (resolved above) for fork PR support - -PR_STATE=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_LOOKUP_REPO" --json state -q .state 2>/dev/null) || PR_STATE="" - -if [[ "$PR_STATE" == "MERGED" ]]; then - echo "PR #$PR_NUMBER has been merged. Marking loop as complete." >&2 - mv "$STATE_FILE" "$LOOP_DIR/merged-state.md" - exit 0 -fi - -if [[ "$PR_STATE" == "CLOSED" ]]; then - echo "PR #$PR_NUMBER has been closed. Marking loop as closed." >&2 - mv "$STATE_FILE" "$LOOP_DIR/closed-state.md" - exit 0 -fi - -# ======================================== -# Check Resolution File Exists -# ======================================== - -RESOLVE_FILE="$LOOP_DIR/round-${PR_CURRENT_ROUND}-pr-resolve.md" - -if [[ ! -f "$RESOLVE_FILE" ]]; then - REASON="# Resolution Summary Missing - -Please write your resolution summary to: $RESOLVE_FILE - -The summary should include: -- Issues addressed -- Files modified -- Tests added (if any)" - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Resolution summary missing for round $PR_CURRENT_ROUND" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# ======================================== -# Check Git Status -# ======================================== - -if command -v git &>/dev/null && run_with_timeout "$GIT_TIMEOUT" git rev-parse --git-dir &>/dev/null 2>&1; then - GIT_STATUS_CACHED=$(run_with_timeout "$GIT_TIMEOUT" git status --porcelain 2>/dev/null) || GIT_EXIT=$? - GIT_EXIT=${GIT_EXIT:-0} - - if [[ $GIT_EXIT -ne 0 ]]; then - REASON="# Git Status Failed - -Git status operation failed. Please check your repository state and try again." - jq -n --arg reason "$REASON" --arg msg "PR Loop: Git status failed" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 - fi - - # Filter out .humanize from status check - NON_HUMANIZE_STATUS=$(echo "$GIT_STATUS_CACHED" | grep -v '\.humanize' || true) - - if [[ -n "$NON_HUMANIZE_STATUS" ]]; then - REASON="# Git Not Clean - -You have uncommitted changes. Please commit all changes before exiting. - -Changes detected: -\`\`\` -$NON_HUMANIZE_STATUS -\`\`\`" - jq -n --arg reason "$REASON" --arg msg "PR Loop: Uncommitted changes detected" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 - fi - - # Step 6: Check for unpushed commits (PR loop always requires push) - CURRENT_BRANCH=$(git branch --show-current 2>/dev/null || echo "main") - AHEAD_COUNT=0 - LOCAL_HEAD=$(git rev-parse HEAD 2>/dev/null) || LOCAL_HEAD="" - - # First try: git status -sb works when upstream is configured - GIT_AHEAD=$(run_with_timeout "$GIT_TIMEOUT" git status -sb 2>/dev/null | grep -o 'ahead [0-9]*' || true) - if [[ -n "$GIT_AHEAD" ]]; then - AHEAD_COUNT=$(echo "$GIT_AHEAD" | grep -o '[0-9]*') - else - # Fallback: Check if upstream exists, if not compare with origin/branch or PR head - if ! git rev-parse --abbrev-ref '@{u}' >/dev/null 2>&1; then - # No upstream configured - try origin/branch first - REMOTE_HEAD=$(git rev-parse "origin/$CURRENT_BRANCH" 2>/dev/null) || REMOTE_HEAD="" - if [[ -n "$LOCAL_HEAD" && -n "$REMOTE_HEAD" && "$LOCAL_HEAD" != "$REMOTE_HEAD" ]]; then - # Count commits ahead of remote - AHEAD_COUNT=$(git rev-list --count "origin/$CURRENT_BRANCH..HEAD" 2>/dev/null) || AHEAD_COUNT=0 - elif [[ -z "$REMOTE_HEAD" && -n "$PR_NUMBER" ]]; then - # No origin/branch exists - compare with PR's headRefOid from GitHub - # This handles cases where branch was never pushed or remote ref is missing - # NOTE: Use --repo for fork PR support (PR_BASE_REPO resolved earlier) - PR_HEAD_SHA=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_BASE_REPO" --json headRefOid -q '.headRefOid' 2>/dev/null) || PR_HEAD_SHA="" - if [[ -z "$PR_HEAD_SHA" ]]; then - # Failed to get PR head - fail closed (assume unpushed) for safety - echo "Warning: Could not fetch PR head SHA, assuming unpushed commits" >&2 - AHEAD_COUNT=1 - elif [[ -n "$LOCAL_HEAD" && "$LOCAL_HEAD" != "$PR_HEAD_SHA" ]]; then - # Local differs from PR head - count commits since PR head - AHEAD_COUNT=$(git rev-list --count "$PR_HEAD_SHA..HEAD" 2>/dev/null) || { - # PR head not in local history (force push?) - treat as 1 unpushed - AHEAD_COUNT=1 - } - fi - fi - fi - fi - - if [[ "$AHEAD_COUNT" -gt 0 ]]; then - FALLBACK_MSG="# Unpushed Commits Detected - -You have $AHEAD_COUNT unpushed commit(s). PR loop requires pushing changes so bots can review them. - -Please push: git push origin $CURRENT_BRANCH" - REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/unpushed-commits.md" "$FALLBACK_MSG" \ - "AHEAD_COUNT=$AHEAD_COUNT" "CURRENT_BRANCH=$CURRENT_BRANCH") - jq -n --arg reason "$REASON" --arg msg "PR Loop: $AHEAD_COUNT unpushed commit(s)" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 - fi -fi - -# ======================================== -# Force Push Detection -# ======================================== - -# Detect if the remote branch HEAD has changed in a way that indicates force push -# This happens when previous commits are no longer reachable from current HEAD -if [[ -n "$PR_LATEST_COMMIT_SHA" ]]; then - CURRENT_HEAD=$(run_with_timeout "$GIT_TIMEOUT" git rev-parse HEAD 2>/dev/null) || CURRENT_HEAD="" - - # Check if the stored commit SHA is still reachable from current HEAD - # If not, a force push (history rewrite) has occurred - if [[ -n "$CURRENT_HEAD" && "$CURRENT_HEAD" != "$PR_LATEST_COMMIT_SHA" ]]; then - # Check if old commit is ancestor of current HEAD - IS_ANCESTOR=$(run_with_timeout "$GIT_TIMEOUT" git merge-base --is-ancestor "$PR_LATEST_COMMIT_SHA" "$CURRENT_HEAD" 2>/dev/null && echo "yes" || echo "no") - - if [[ "$IS_ANCESTOR" == "no" ]]; then - echo "Force push detected: $PR_LATEST_COMMIT_SHA is no longer reachable from $CURRENT_HEAD" >&2 - - # Preserve OLD commit SHA before updating state - OLD_COMMIT_SHA="$PR_LATEST_COMMIT_SHA" - - # Get the timestamp of the new HEAD commit for trigger validation - # This ensures detect_trigger_comment only accepts comments AFTER the force push - # NOTE: Uses PR_LOOKUP_REPO for fork PR support - NEW_HEAD_COMMIT_AT=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_LOOKUP_REPO" --json commits \ - --jq '.commits | sort_by(.committedDate) | last | .committedDate' 2>/dev/null) || NEW_HEAD_COMMIT_AT="" - - if [[ -z "$NEW_HEAD_COMMIT_AT" ]]; then - # Fallback: use current timestamp - NEW_HEAD_COMMIT_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ") - fi - - # Update state file with new commit SHA/timestamp and clear trigger state - # Clear BOTH last_trigger_at AND trigger_comment_id to prevent stale eyes checks - TEMP_FILE="${STATE_FILE}.forcepush.$$" - sed -e "s/^latest_commit_sha:.*/latest_commit_sha: $CURRENT_HEAD/" \ - -e "s/^latest_commit_at:.*/latest_commit_at: $NEW_HEAD_COMMIT_AT/" \ - -e "s/^last_trigger_at:.*/last_trigger_at:/" \ - -e "s/^trigger_comment_id:.*/trigger_comment_id:/" \ - "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - - # Update local variables to reflect the change - PR_LATEST_COMMIT_SHA="$CURRENT_HEAD" - PR_LATEST_COMMIT_AT="$NEW_HEAD_COMMIT_AT" - PR_LAST_TRIGGER_AT="" - PR_TRIGGER_COMMENT_ID="" - - FALLBACK_MSG="# Force Push Detected - -A force push (history rewrite) has been detected. Post a new @bot trigger comment: $PR_BOT_MENTION_STRING" - REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/force-push-detected.md" "$FALLBACK_MSG" \ - "OLD_COMMIT=$OLD_COMMIT_SHA" "NEW_COMMIT=$CURRENT_HEAD" "BOT_MENTION_STRING=$PR_BOT_MENTION_STRING" \ - "PR_NUMBER=$PR_NUMBER") - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Force push detected - please re-trigger bots" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 - fi - fi -fi - -# ======================================== -# Check Max Iterations -# ======================================== - -NEXT_ROUND=$((PR_CURRENT_ROUND + 1)) - -if [[ $NEXT_ROUND -gt $PR_MAX_ITERATIONS ]]; then - echo "PR loop reached max iterations ($PR_MAX_ITERATIONS). Exiting." >&2 - mv "$STATE_FILE" "$LOOP_DIR/maxiter-state.md" - exit 0 -fi - -# ======================================== -# Check if Active Bots Remain -# ======================================== -# NOTE: Step 8 (Codex +1 check) has been moved to after trigger detection -# to ensure it uses the correct timestamp that accounts for new commits. - -if [[ ${#PR_ACTIVE_BOTS_ARRAY[@]} -eq 0 ]]; then - echo "All bots have approved. PR loop complete!" >&2 - mv "$STATE_FILE" "$LOOP_DIR/approve-state.md" - exit 0 -fi - -# ======================================== -# Detect Trigger Comment and Update last_trigger_at -# ======================================== - -# Get current GitHub user login for trigger comment filtering -get_current_user() { - run_with_timeout "$GH_TIMEOUT" gh api user --jq '.login' 2>/dev/null || echo "" -} - -# Find the most recent PR comment from CURRENT USER that contains bot mentions -# Returns: "timestamp|comment_id" on success -# This timestamp is used for --after filtering to catch fast bot replies -# NOTE: Uses --paginate to handle PRs with >30 comments -# IMPORTANT: If latest_commit_at is set, only accepts comments AFTER that timestamp -# This prevents old triggers from being re-used after force push -# IMPORTANT: Uses PR_BASE_REPO (not {owner}/{repo}) for fork PR support -detect_trigger_comment() { - local pr_num="$1" - local current_user="$2" - local after_timestamp="${3:-}" # Optional: only accept comments after this timestamp - - # Fetch ALL issue comments on the PR (paginated to handle >30 comments) - # Using --paginate ensures we don't miss the latest @mention on large PRs - # IMPORTANT: --jq with --paginate runs per-page, so we output objects (not array) - # and use jq -s to aggregate all pages into a single array before filtering - # IMPORTANT: Use PR_BASE_REPO for fork PRs - comments are on base repo, not fork - local comments_json - comments_json=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/$pr_num/comments" \ - --paginate --jq '.[] | {id: .id, author: .user.login, created_at: .created_at, body: .body}' 2>/dev/null \ - | jq -s '.') || return 1 - - if [[ -z "$comments_json" || "$comments_json" == "[]" ]]; then - return 1 - fi - - # Build pattern to match any @bot mention - local bot_pattern="" - for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - if [[ -n "$bot_pattern" ]]; then - bot_pattern="${bot_pattern}|@${bot}" - else - bot_pattern="@${bot}" - fi - done - - # Find most recent trigger comment from CURRENT USER (sorted by created_at descending) - # comments_json is already aggregated from all pages into a single array - # If after_timestamp is set, only accept comments created after that timestamp - # Returns both timestamp and comment ID - local trigger_info - if [[ -n "$after_timestamp" ]]; then - # Filter to only comments AFTER the specified timestamp (force push protection) - trigger_info=$(echo "$comments_json" | jq -r \ - --arg pattern "$bot_pattern" \ - --arg user "$current_user" \ - --arg after "$after_timestamp" ' - [.[] | select( - .author == $user and - (.body | test($pattern; "i")) and - (.created_at >= $after) - )] | - sort_by(.created_at) | reverse | .[0] | "\(.created_at)|\(.id)" // empty - ') - else - trigger_info=$(echo "$comments_json" | jq -r --arg pattern "$bot_pattern" --arg user "$current_user" ' - [.[] | select(.author == $user and (.body | test($pattern; "i")))] | - sort_by(.created_at) | reverse | .[0] | "\(.created_at)|\(.id)" // empty - ') - fi - - if [[ -n "$trigger_info" && "$trigger_info" != "null|null" && "$trigger_info" != "|" ]]; then - echo "$trigger_info" - return 0 - fi - - return 1 -} - -# Get current user for trigger comment filtering -CURRENT_USER=$(get_current_user) -if [[ -z "$CURRENT_USER" ]]; then - echo "Warning: Could not determine current GitHub user" >&2 -fi - -# ======================================== -# Refresh latest_commit_at from PR Before Trigger Detection -# ======================================== -# Ensure trigger validation uses the CURRENT latest commit timestamp, -# not a stale value from state. This prevents old triggers from being accepted -# after new (non-force) commits are pushed. - -# NOTE: Uses PR_LOOKUP_REPO for fork PR support -CURRENT_LATEST_COMMIT_AT=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_LOOKUP_REPO" --json commits \ - --jq '.commits | sort_by(.committedDate) | last | .committedDate' 2>/dev/null) || CURRENT_LATEST_COMMIT_AT="" - -# Track if new commits were detected (used to override REQUIRE_TRIGGER for cases 2/3) -NEW_COMMITS_DETECTED=false - -if [[ -n "$CURRENT_LATEST_COMMIT_AT" && "$CURRENT_LATEST_COMMIT_AT" != "$PR_LATEST_COMMIT_AT" ]]; then - echo "Updating latest_commit_at: $PR_LATEST_COMMIT_AT -> $CURRENT_LATEST_COMMIT_AT" >&2 - echo " Clearing stale trigger fields (new commits require new @bot mention)" >&2 - - # Persist to state file and clear trigger fields to prevent stale polling - # New commits mean old trigger is invalid - user must post new @bot comment - TEMP_FILE="${STATE_FILE}.commitrefresh.$$" - sed -e "s/^latest_commit_at:.*/latest_commit_at: $CURRENT_LATEST_COMMIT_AT/" \ - -e "s/^last_trigger_at:.*/last_trigger_at:/" \ - -e "s/^trigger_comment_id:.*/trigger_comment_id:/" \ - "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - - PR_LATEST_COMMIT_AT="$CURRENT_LATEST_COMMIT_AT" - PR_LAST_TRIGGER_AT="" - PR_TRIGGER_COMMENT_ID="" - NEW_COMMITS_DETECTED=true -fi - -# ALWAYS check for newer trigger comments and update last_trigger_at -# This ensures we use the most recent trigger, not a stale one -# IMPORTANT: Pass latest_commit_at to filter out old triggers (force push protection) -# After a force push, we need a NEW trigger comment, not one from before the push -echo "Detecting trigger comment timestamp from user '$CURRENT_USER'..." >&2 -if [[ -n "$PR_LATEST_COMMIT_AT" ]]; then - echo " (Filtering for comments after: $PR_LATEST_COMMIT_AT)" >&2 -fi -DETECTED_TRIGGER_INFO=$(detect_trigger_comment "$PR_NUMBER" "$CURRENT_USER" "$PR_LATEST_COMMIT_AT") || true -DETECTED_TRIGGER_AT="" -DETECTED_TRIGGER_COMMENT_ID="" - -if [[ -n "$DETECTED_TRIGGER_INFO" ]]; then - # Parse timestamp and comment ID from "timestamp|id" format - DETECTED_TRIGGER_AT="${DETECTED_TRIGGER_INFO%%|*}" - DETECTED_TRIGGER_COMMENT_ID="${DETECTED_TRIGGER_INFO##*|}" -fi - -if [[ -n "$DETECTED_TRIGGER_AT" ]]; then - # Check if detected trigger is newer than stored one - if [[ -z "$PR_LAST_TRIGGER_AT" ]] || [[ "$DETECTED_TRIGGER_AT" > "$PR_LAST_TRIGGER_AT" ]]; then - echo "Found trigger comment at: $DETECTED_TRIGGER_AT (ID: $DETECTED_TRIGGER_COMMENT_ID)" >&2 - if [[ -n "$PR_LAST_TRIGGER_AT" ]]; then - echo " (Updating from older trigger: $PR_LAST_TRIGGER_AT)" >&2 - fi - PR_LAST_TRIGGER_AT="$DETECTED_TRIGGER_AT" - PR_TRIGGER_COMMENT_ID="$DETECTED_TRIGGER_COMMENT_ID" - - # Persist to state file - TEMP_FILE="${STATE_FILE}.trigger.$$" - sed -e "s/^last_trigger_at:.*/last_trigger_at: $DETECTED_TRIGGER_AT/" \ - -e "s/^trigger_comment_id:.*/trigger_comment_id: $DETECTED_TRIGGER_COMMENT_ID/" \ - "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - - # Note: Claude eyes verification is done in the dedicated section below - # (after trigger detection) to ensure it runs on EVERY exit attempt - else - echo "Using existing trigger timestamp: $PR_LAST_TRIGGER_AT" >&2 - fi -fi - -# ======================================== -# Determine if Trigger is Required (needed for Claude eyes check below) -# ======================================== - -# Trigger requirement logic: -# - Round 0, startup_case 1: No trigger required (waiting for initial auto-reviews) -# - Round 0, startup_case 2/3: No trigger required (process existing comments) -# - Round 0, startup_case 4/5: Trigger required (new commits after reviews) -# - Round > 0: Always require trigger -# - NEW: If new commits detected during this poll, require trigger (overrides cases 2/3) - -REQUIRE_TRIGGER=false -if [[ "$PR_CURRENT_ROUND" -gt 0 ]]; then - # Subsequent rounds always require a trigger - REQUIRE_TRIGGER=true -elif [[ "$NEW_COMMITS_DETECTED" == "true" ]]; then - # New commits detected during this poll - require fresh trigger - # This overrides cases 2/3 to prevent reusing stale reviews - REQUIRE_TRIGGER=true -elif [[ "$PR_CURRENT_ROUND" -eq 0 ]]; then - case "${PR_STARTUP_CASE:-1}" in - 1|2|3) - # Case 1: No comments yet - wait for initial auto-reviews - # Case 2/3: Comments exist - process them without requiring new trigger - REQUIRE_TRIGGER=false - ;; - 4|5) - # Case 4/5: All commented but new commits pushed - require re-trigger - REQUIRE_TRIGGER=true - ;; - *) - # Unknown case, default to not requiring trigger - REQUIRE_TRIGGER=false - ;; - esac -fi - -# ======================================== -# Step 8: Check for Codex +1 Reaction (After Trigger Detection) -# ======================================== -# IMPORTANT: This check runs AFTER trigger detection to ensure: -# 1. We use the correct timestamp that accounts for new commits -# 2. If trigger is required but missing, we don't approve based on old +1 - -# Check for codex bot in active bots -CODEX_IN_ACTIVE=false -for bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$bot" == "codex" ]]; then - CODEX_IN_ACTIVE=true - break - fi -done - -if [[ "$CODEX_IN_ACTIVE" == "true" ]]; then - # Skip +1 check if trigger is required but not yet posted - # (User needs to post @codex comment first) - if [[ "$REQUIRE_TRIGGER" == "true" && -z "$PR_LAST_TRIGGER_AT" ]]; then - echo "Skipping Codex +1 check: trigger required but not yet posted" >&2 - else - echo "Round $PR_CURRENT_ROUND: Checking for Codex +1 reaction on PR..." >&2 - - # Determine the timestamp for filtering +1 reactions - # Use trigger timestamp if available, otherwise fall back to loop start time - CODEX_REACTION_AFTER="${PR_LAST_TRIGGER_AT:-$PR_STARTED_AT}" - echo " (Checking for +1 after: $CODEX_REACTION_AFTER)" >&2 - - # Check for +1 reaction from Codex - CODEX_REACTION=$("$PLUGIN_ROOT/scripts/check-bot-reactions.sh" codex-thumbsup "$PR_NUMBER" --after "$CODEX_REACTION_AFTER" 2>/dev/null) || CODEX_REACTION="" - - if [[ -n "$CODEX_REACTION" && "$CODEX_REACTION" != "null" ]]; then - REACTION_AT=$(echo "$CODEX_REACTION" | jq -r '.created_at') - echo "Codex +1 detected at $REACTION_AT - removing codex from active_bots" >&2 - - # Remove only codex from active_bots, keep other bots - declare -a NEW_ACTIVE_BOTS_AFTER_THUMBSUP=() - for bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$bot" != "codex" ]]; then - NEW_ACTIVE_BOTS_AFTER_THUMBSUP+=("$bot") - fi - done - - # If no other bots remain, loop is complete - if [[ ${#NEW_ACTIVE_BOTS_AFTER_THUMBSUP[@]} -eq 0 ]]; then - echo "Codex was the only active bot - PR loop approved!" >&2 - mv "$STATE_FILE" "$LOOP_DIR/approve-state.md" - exit 0 - fi - - # Update active_bots in state file and continue with other bots - echo "Continuing with remaining bots: ${NEW_ACTIVE_BOTS_AFTER_THUMBSUP[*]}" >&2 - PR_ACTIVE_BOTS_ARRAY=("${NEW_ACTIVE_BOTS_AFTER_THUMBSUP[@]}") - - # Update state file - NEW_ACTIVE_BOTS_YAML=$(build_yaml_list "${PR_ACTIVE_BOTS_ARRAY[@]}") - - TEMP_FILE="${STATE_FILE}.thumbsup.$$" - # Replace active_bots section in state file - awk -v new_bots="$NEW_ACTIVE_BOTS_YAML" ' - /^active_bots:/ { - print "active_bots:" new_bots - in_bots=1 - next - } - in_bots && /^[[:space:]]+-/ { next } - in_bots && /^[a-zA-Z]/ { in_bots=0 } - { print } - ' "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - fi - fi -fi - -# ======================================== -# Validate Trigger Comment Exists (Based on startup_case and round) -# ======================================== - -# Validate trigger FIRST, before Claude eyes check -# This ensures we don't waste time checking eyes on a stale trigger_comment_id - -if [[ "$REQUIRE_TRIGGER" == "true" && -z "$PR_LAST_TRIGGER_AT" ]]; then - # Determine startup case description for template - STARTUP_CASE_DESC="requires trigger comment" - case "${PR_STARTUP_CASE:-1}" in - 4) STARTUP_CASE_DESC="New commits after all bots reviewed" ;; - 5) STARTUP_CASE_DESC="New commits after partial bot reviews" ;; - *) STARTUP_CASE_DESC="Subsequent round requires trigger" ;; - esac - - FALLBACK_MSG="# Missing Trigger Comment - -No @bot mention found. Please run: gh pr comment $PR_NUMBER --body \"$PR_BOT_MENTION_STRING please review\"" - REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/no-trigger-comment.md" "$FALLBACK_MSG" \ - "STARTUP_CASE=${PR_STARTUP_CASE:-1}" "STARTUP_CASE_DESC=$STARTUP_CASE_DESC" \ - "CURRENT_ROUND=$PR_CURRENT_ROUND" "BOT_MENTION_STRING=$PR_BOT_MENTION_STRING") - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Missing trigger comment - please @mention bots first" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# ======================================== -# Claude Eyes Verification (AFTER trigger validation) -# ======================================== - -# Verify Claude eyes ONLY AFTER trigger is confirmed to exist -# This prevents checking eyes on a stale trigger_comment_id -# Conditions: -# 1. Claude is configured AND -# 2. A trigger is actually required (REQUIRE_TRIGGER=true) AND -# 3. A trigger comment ID exists (PR_TRIGGER_COMMENT_ID from confirmed detection above) - -CLAUDE_CONFIGURED=false -for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - if [[ "$bot" == "claude" ]]; then - CLAUDE_CONFIGURED=true - break - fi -done - -if [[ "$CLAUDE_CONFIGURED" == "true" && "$REQUIRE_TRIGGER" == "true" ]]; then - # Use the confirmed trigger comment ID (updated by detect_trigger_comment above) - TRIGGER_ID_TO_CHECK="${PR_TRIGGER_COMMENT_ID:-}" - - if [[ -n "$TRIGGER_ID_TO_CHECK" ]]; then - echo "Verifying Claude eyes reaction on trigger comment (ID: $TRIGGER_ID_TO_CHECK)..." >&2 - - # Check for eyes reaction with 3x5s retry - # Pass --pr for fork PR support (reactions are on base repo) - EYES_REACTION=$("$PLUGIN_ROOT/scripts/check-bot-reactions.sh" claude-eyes "$TRIGGER_ID_TO_CHECK" --pr "$PR_NUMBER" --retry 3 --delay 5 2>/dev/null) || EYES_REACTION="" - - if [[ -z "$EYES_REACTION" || "$EYES_REACTION" == "null" ]]; then - # Claude eyes verification is BLOCKING - error after 3x5s retries - FALLBACK_MSG="# Claude Bot Not Responding - -The Claude bot did not respond with an 'eyes' reaction within 15 seconds (3 x 5s retries). -Please verify the Claude bot is installed and configured for this repository." - REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/claude-eyes-timeout.md" "$FALLBACK_MSG" \ - "RETRY_COUNT=3" "TOTAL_WAIT_SECONDS=15") - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Claude bot not responding - check bot configuration" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 - else - echo "Claude eyes reaction confirmed!" >&2 - fi - else - # Trigger exists (PR_LAST_TRIGGER_AT is set) but no ID - should not happen normally - echo "Warning: Trigger exists but no comment ID for eyes verification" >&2 - fi -elif [[ "$CLAUDE_CONFIGURED" == "true" ]]; then - echo "Claude is configured but trigger not required (startup_case=${PR_STARTUP_CASE:-1}, round=$PR_CURRENT_ROUND) - skipping eyes verification" >&2 -fi - -# ======================================== -# Poll for New Bot Reviews (per-bot tracking) -# ======================================== - -# Poll ALL configured bots, not just active - allows re-adding approved bots if they post new issues -echo "Polling for new bot reviews on PR #$PR_NUMBER..." >&2 -echo "Configured bots: $PR_CONFIGURED_BOTS_DISPLAY" >&2 -echo "Active bots: $PR_ACTIVE_BOTS_DISPLAY" >&2 -echo "Poll interval: ${PR_POLL_INTERVAL}s, Timeout: ${PR_POLL_TIMEOUT}s per bot" >&2 - -POLL_SCRIPT="$PLUGIN_ROOT/scripts/poll-pr-reviews.sh" - -# Consistent file naming: round-N files all refer to round N -COMMENT_FILE="$LOOP_DIR/round-${NEXT_ROUND}-pr-comment.md" - -# Get timestamp for filtering based on startup_case and round -# - With trigger: use trigger timestamp (most accurate) -# - Round 0, Case 1: use started_at (waiting for new auto-reviews) -# - Round 0, Case 2/3: use epoch 0 to collect ALL existing comments -# - Round 0, Case 4/5: should have trigger (blocked above if missing) -AFTER_TIMESTAMP="" -USE_ALL_COMMENTS=false - -if [[ -n "$PR_LAST_TRIGGER_AT" ]]; then - # Always use trigger timestamp when available - AFTER_TIMESTAMP="$PR_LAST_TRIGGER_AT" - echo "Round $PR_CURRENT_ROUND: using trigger timestamp for --after: $AFTER_TIMESTAMP" >&2 -elif [[ "$PR_CURRENT_ROUND" -eq 0 ]]; then - case "${PR_STARTUP_CASE:-1}" in - 1) - # Case 1: No comments yet - filter by started_at to wait for new reviews - AFTER_TIMESTAMP="${PR_STARTED_AT}" - echo "Round 0, Case 1: using started_at for --after: $AFTER_TIMESTAMP" >&2 - ;; - 2|3) - # Case 2/3: Existing comments - collect ALL of them (no timestamp filter) - USE_ALL_COMMENTS=true - AFTER_TIMESTAMP="1970-01-01T00:00:00Z" # Epoch 0 to include all comments - echo "Round 0, Case ${PR_STARTUP_CASE}: collecting ALL existing bot comments" >&2 - ;; - *) - # Case 4/5 should have been blocked above, use started_at as fallback - AFTER_TIMESTAMP="${PR_STARTED_AT}" - echo "Round 0, Case ${PR_STARTUP_CASE}: using started_at for --after: $AFTER_TIMESTAMP" >&2 - ;; - esac -else - # Round N>0 with no trigger - this should have been blocked earlier - # but handle defensively by blocking here too - REASON="# Missing Trigger Comment - -No @bot mention comment found from you on this PR. - -Before polling for bot reviews, you must comment on the PR to trigger the bots. - -**Please run:** -\`\`\`bash -gh pr comment $PR_NUMBER --body \"$PR_BOT_MENTION_STRING please review the latest changes\" -\`\`\` - -Then try exiting again." - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Missing trigger comment" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# Convert trigger timestamp to epoch for timeout anchoring -# Per-bot timeouts are measured from the TRIGGER time, not poll start time -# Special case: when USE_ALL_COMMENTS is true (startup cases 2/3), we're looking at -# ALL historical comments. In this case, anchor timeout to NOW (poll start time) -# rather than PR_STARTED_AT, which could be hours old and cause instant timeout. -if [[ "$USE_ALL_COMMENTS" == "true" ]]; then - # Use current time as timeout anchor for historical comment review - TRIGGER_EPOCH=$(date +%s) -else - TRIGGER_EPOCH=$(date -d "$AFTER_TIMESTAMP" +%s 2>/dev/null || date -j -f "%Y-%m-%dT%H:%M:%SZ" "$AFTER_TIMESTAMP" +%s 2>/dev/null || date +%s) -fi - -# Track which bots have responded and their individual timeouts -# IMPORTANT: Poll ALL configured bots (not just active) so we can detect when -# previously approved bots post new issues and re-add them to active_bots -# IMPORTANT: Timeouts are anchored to TRIGGER_EPOCH, not poll start time -# This ensures the 15-minute window is measured from when the @mention was posted -# -# NOTE: Using dynamic variable names instead of associative arrays (declare -A) -# for macOS Bash 3.2 compatibility. Associative arrays require Bash 4.0+. -# Helper functions to get/set values: -_sanitize_key() { echo "$1" | tr -c 'a-zA-Z0-9_' '_'; } -_map_get() { local var="$1_$(_sanitize_key "$2")"; echo "${!var}"; } -_map_set() { local var="$1_$(_sanitize_key "$2")"; eval "$var=\"$3\""; } -_map_isset() { local var="$1_$(_sanitize_key "$2")"; [[ -n "${!var+x}" ]]; } - -POLL_START_EPOCH=$(date +%s) -echo "Timeout anchor: trigger at epoch $TRIGGER_EPOCH (poll started at $POLL_START_EPOCH)" >&2 -for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - _map_set "BOTS_RESPONDED" "$bot" "false" - _map_set "BOTS_TIMED_OUT" "$bot" "false" - # Use TRIGGER_EPOCH for timeout, not poll start - _map_set "BOTS_TIMEOUT_START" "$bot" "$TRIGGER_EPOCH" -done - -# Collect all new comments with deduplication by id -# Using dynamic variables: SEEN_ID_<sanitized_id>=1 -ALL_NEW_COMMENTS="[]" - -while true; do - CURRENT_TIME=$(date +%s) - - # Check if all configured bots have responded OR timed out (per-bot 15min timeout) - ALL_DONE=true - WAITING_BOTS="" - TIMED_OUT_BOTS="" - - for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - if [[ "$(_map_get BOTS_RESPONDED "$bot")" == "true" ]]; then - continue # Bot already responded - fi - - # Check per-bot timeout (15 minutes each) - auto-remove after timeout - BOT_ELAPSED=$((CURRENT_TIME - $(_map_get BOTS_TIMEOUT_START "$bot"))) - if [[ $BOT_ELAPSED -ge $PR_POLL_TIMEOUT ]]; then - echo "Bot '$bot' timed out after ${PR_POLL_TIMEOUT}s - will be removed from active_bots" >&2 - _map_set "BOTS_TIMED_OUT" "$bot" "true" # Mark as timed out for later removal - if [[ -n "$TIMED_OUT_BOTS" ]]; then - TIMED_OUT_BOTS="${TIMED_OUT_BOTS}, ${bot}" - else - TIMED_OUT_BOTS="$bot" - fi - continue # Mark as done (timed out) - fi - - # Bot still waiting - ALL_DONE=false - if [[ -n "$WAITING_BOTS" ]]; then - WAITING_BOTS="${WAITING_BOTS},${bot}" - else - WAITING_BOTS="$bot" - fi - done - - if [[ "$ALL_DONE" == "true" ]]; then - if [[ -n "$TIMED_OUT_BOTS" ]]; then - echo "Polling complete. Timed out bots: $TIMED_OUT_BOTS" >&2 - else - echo "All configured bots have responded!" >&2 - fi - break - fi - - # Check for cancel signal - if [[ -f "$LOOP_DIR/.cancel-requested" ]]; then - echo "Cancel requested, exiting poll loop..." >&2 - exit 0 - fi - - TOTAL_ELAPSED=$((CURRENT_TIME - POLL_START_EPOCH)) - echo "Poll attempt (elapsed: ${TOTAL_ELAPSED}s, waiting for: $WAITING_BOTS)..." >&2 - - # Poll for new comments from bots we're still waiting for - POLL_RESULT=$("$POLL_SCRIPT" "$PR_NUMBER" --after "$AFTER_TIMESTAMP" --bots "$WAITING_BOTS" 2>/dev/null) || { - echo "Warning: Poll script failed, retrying..." >&2 - sleep "$PR_POLL_INTERVAL" - continue - } - - # Check which bots responded (check all configured bots) - # Poll script returns author names (e.g., chatgpt-codex-connector[bot]) - # We need to map them back to bot names (e.g., codex) - RESPONDED_BOTS=$(echo "$POLL_RESULT" | jq -r '.bots_responded[]' 2>/dev/null || true) - for responded_author in $RESPONDED_BOTS; do - # Map author name to bot name (e.g., chatgpt-codex-connector[bot] -> codex) - responded_bot=$(map_author_to_bot "$responded_author") - for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - if [[ "$responded_bot" == "$bot" ]]; then - if [[ "$(_map_get BOTS_RESPONDED "$bot")" != "true" ]]; then - _map_set "BOTS_RESPONDED" "$bot" "true" - echo "Bot '$bot' has responded!" >&2 - fi - fi - done - done - - # Check for Codex +1 reaction during polling (any round) - # Codex may give +1 instead of commenting if no issues found - if [[ "$(_map_get BOTS_RESPONDED codex)" != "true" ]]; then - # Check if codex is a configured bot - CODEX_CONFIGURED=false - for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - [[ "$bot" == "codex" ]] && CODEX_CONFIGURED=true && break - done - - if [[ "$CODEX_CONFIGURED" == "true" ]]; then - # Determine timestamp for filtering - use trigger if available, else loop start - POLL_REACTION_AFTER="${PR_LAST_TRIGGER_AT:-$PR_STARTED_AT}" - - # Check for +1 reaction - THUMBSUP_RESULT=$("$PLUGIN_ROOT/scripts/check-bot-reactions.sh" codex-thumbsup "$PR_NUMBER" --after "$POLL_REACTION_AFTER" 2>/dev/null) || THUMBSUP_RESULT="" - - if [[ -n "$THUMBSUP_RESULT" && "$THUMBSUP_RESULT" != "null" ]]; then - # +1 found - codex approved without issues - echo "Codex +1 reaction detected during polling - treating as approval!" >&2 - _map_set "BOTS_RESPONDED" "codex" "true" - - # Remove codex from active_bots - declare -a NEW_ACTIVE_BOTS_THUMBSUP=() - for bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$bot" != "codex" ]]; then - NEW_ACTIVE_BOTS_THUMBSUP+=("$bot") - else - echo "Removing 'codex' from active_bots (approved via +1)" >&2 - fi - done - PR_ACTIVE_BOTS_ARRAY=("${NEW_ACTIVE_BOTS_THUMBSUP[@]}") - - # Update active_bots in state file - if [[ ${#PR_ACTIVE_BOTS_ARRAY[@]} -eq 0 ]]; then - echo "All bots have approved (codex via +1) - PR loop complete!" >&2 - mv "$STATE_FILE" "$LOOP_DIR/approve-state.md" - exit 0 - else - # Update state file with remaining bots - ACTIVE_BOTS_YAML=$(build_yaml_list "${PR_ACTIVE_BOTS_ARRAY[@]}") - # Use awk to replace active_bots section (portable across GNU/BSD) - TEMP_FILE="${STATE_FILE}.thumbsup.$$" - awk -v bots="$ACTIVE_BOTS_YAML" ' - /^active_bots:$/ { - print "active_bots:" bots - skip = 1 - next - } - skip && /^[a-z_]+:/ { skip = 0 } - skip && /^ - / { next } - !skip { print } - ' "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - fi - fi - fi - fi - - # Collect new comments WITH DEDUPLICATION by comment id - NEW_COMMENTS=$(echo "$POLL_RESULT" | jq -r '.comments' 2>/dev/null || echo "[]") - if [[ "$NEW_COMMENTS" != "[]" && "$NEW_COMMENTS" != "null" ]]; then - # Deduplicate: only add comments we haven't seen before - UNIQUE_COMMENTS="[]" - while IFS= read -r comment_json; do - [[ -z "$comment_json" || "$comment_json" == "null" ]] && continue - COMMENT_ID=$(echo "$comment_json" | jq -r '.id // empty') - if [[ -n "$COMMENT_ID" ]] && ! _map_isset "SEEN_COMMENT_IDS" "$COMMENT_ID"; then - _map_set "SEEN_COMMENT_IDS" "$COMMENT_ID" "1" - UNIQUE_COMMENTS=$(echo "$UNIQUE_COMMENTS" | jq --argjson c "$comment_json" '. + [$c]') - fi - done < <(echo "$NEW_COMMENTS" | jq -c '.[]') - - if [[ "$UNIQUE_COMMENTS" != "[]" ]]; then - ALL_NEW_COMMENTS=$(echo "$ALL_NEW_COMMENTS $UNIQUE_COMMENTS" | jq -s 'add') - fi - fi - - sleep "$PR_POLL_INTERVAL" -done - -# ======================================== -# Handle No Responses (auto-remove timed-out bots) -# ======================================== - -COMMENT_COUNT=$(echo "$ALL_NEW_COMMENTS" | jq 'length' 2>/dev/null || echo "0") - -if [[ "$COMMENT_COUNT" == "0" ]]; then - echo "No new bot reviews received." >&2 - - # Always remove timed-out bots from active_bots (per-bot timeout behavior) - # Don't wait for ALL bots to timeout - remove each bot as it times out - TIMED_OUT_COUNT=0 - WAITING_COUNT=0 - declare -a NEW_ACTIVE_BOTS_TIMEOUT=() - - for bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$(_map_get BOTS_TIMED_OUT "$bot")" == "true" ]]; then - TIMED_OUT_COUNT=$((TIMED_OUT_COUNT + 1)) - echo "Removing '$bot' from active_bots (timed out after ${PR_POLL_TIMEOUT}s)" >&2 - # Don't add to NEW_ACTIVE_BOTS_TIMEOUT - elif [[ "$(_map_get BOTS_RESPONDED "$bot")" != "true" ]]; then - WAITING_COUNT=$((WAITING_COUNT + 1)) - NEW_ACTIVE_BOTS_TIMEOUT+=("$bot") - else - # Bot responded - keep in active (will be processed if comments come in) - NEW_ACTIVE_BOTS_TIMEOUT+=("$bot") - fi - done - - # If any bots timed out, update the state file with remaining active bots - if [[ $TIMED_OUT_COUNT -gt 0 ]]; then - PR_ACTIVE_BOTS_ARRAY=("${NEW_ACTIVE_BOTS_TIMEOUT[@]}") - - # If no bots remain, loop is complete - if [[ ${#PR_ACTIVE_BOTS_ARRAY[@]} -eq 0 ]]; then - echo "All bots removed (timed out) - PR loop approved!" >&2 - # Build configured_bots YAML - TIMEOUT_CONFIGURED_BOTS_YAML=$(build_yaml_list "${PR_CONFIGURED_BOTS_ARRAY[@]}") - # Write updated state with empty active_bots before moving to approve-state.md - { - echo "---" - echo "current_round: $PR_CURRENT_ROUND" - echo "max_iterations: $PR_MAX_ITERATIONS" - echo "pr_number: $PR_NUMBER" - echo "start_branch: $PR_START_BRANCH" - echo "configured_bots:${TIMEOUT_CONFIGURED_BOTS_YAML}" - echo "active_bots:" - echo "codex_model: $PR_CODEX_MODEL" - echo "codex_effort: $PR_CODEX_EFFORT" - echo "codex_timeout: $PR_CODEX_TIMEOUT" - echo "poll_interval: $PR_POLL_INTERVAL" - echo "poll_timeout: $PR_POLL_TIMEOUT" - echo "started_at: $PR_STARTED_AT" - echo "startup_case: ${PR_STARTUP_CASE:-1}" - echo "latest_commit_sha: ${PR_LATEST_COMMIT_SHA:-}" - echo "latest_commit_at: ${PR_LATEST_COMMIT_AT:-}" - echo "last_trigger_at: ${PR_LAST_TRIGGER_AT:-}" - echo "trigger_comment_id: ${PR_TRIGGER_COMMENT_ID:-}" - echo "---" - } > "$LOOP_DIR/approve-state.md" - rm -f "$STATE_FILE" - exit 0 - fi - - # Persist updated active_bots to state file (some bots timed out, others still waiting) - echo "Updating state file with ${#PR_ACTIVE_BOTS_ARRAY[@]} remaining active bots" >&2 - TIMEOUT_ACTIVE_BOTS_YAML=$(build_yaml_list "${PR_ACTIVE_BOTS_ARRAY[@]}") - TEMP_FILE="${STATE_FILE}.timeout.$$" - awk -v bots="$TIMEOUT_ACTIVE_BOTS_YAML" ' - /^active_bots:$/ { - print "active_bots:" bots - skip = 1 - next - } - skip && /^[a-z_]+:/ { skip = 0 } - skip && /^ - / { next } - !skip { print } - ' "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - fi - - # Build list of bots that didn't respond (only non-timed-out bots that are still waiting) - MISSING_BOTS="" - for bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$(_map_get BOTS_RESPONDED "$bot")" != "true" ]]; then - if [[ -n "$MISSING_BOTS" ]]; then - MISSING_BOTS="${MISSING_BOTS}, ${bot}" - else - MISSING_BOTS="$bot" - fi - fi - done - - REASON="# Bot Review Timeout - -No new reviews received from bots after polling. - -**Bots that did not respond:** $MISSING_BOTS - -This might mean: -- The bots haven't been triggered (did you comment on the PR?) -- The bots are slow to respond -- The bots are not enabled on this repository - -**Options:** -1. Comment on the PR to trigger bot reviews: - \`\`\`bash - gh pr comment $PR_NUMBER --body \"$PR_BOT_MENTION_STRING please review the latest changes\" - \`\`\` -2. Wait and try exiting again -3. Cancel the loop: \`/humanize:cancel-pr-loop\`" - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Bot review timeout" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# ======================================== -# Save New Comments (correct file naming) -# ======================================== - -# Format comments grouped by bot (use configured bots for completeness) -cat > "$COMMENT_FILE" << EOF -# Bot Reviews (Round $NEXT_ROUND) - -Fetched at: $(date -u +%Y-%m-%dT%H:%M:%SZ) -Configured bots: $PR_CONFIGURED_BOTS_DISPLAY -Currently active: $PR_ACTIVE_BOTS_DISPLAY - ---- - -EOF - -# Group comments by ALL configured bots (not just active) -# This allows Codex to see when previously approved bots post new issues -for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - # Map bot name to author name (e.g., codex -> chatgpt-codex-connector[bot]) - author=$(map_bot_to_author "$bot") - BOT_COMMENTS=$(echo "$ALL_NEW_COMMENTS" | jq -r --arg author "$author" ' - [.[] | select(.author == $author)] - ') - BOT_COUNT=$(echo "$BOT_COMMENTS" | jq 'length') - - if [[ "$BOT_COUNT" -gt 0 ]]; then - echo "## Comments from ${author}" >> "$COMMENT_FILE" - echo "" >> "$COMMENT_FILE" - - echo "$BOT_COMMENTS" | jq -r ' - .[] | - "### Comment\n\n" + - "- **Type**: \(.type | gsub("_"; " "))\n" + - "- **Time**: \(.created_at)\n" + - (if .path then "- **File**: `\(.path)`\(if .line then " (line \(.line))" else "" end)\n" else "" end) + - (if .state then "- **Status**: \(.state)\n" else "" end) + - "\n\(.body)\n\n---\n" - ' >> "$COMMENT_FILE" - else - echo "## Comments from ${author}" >> "$COMMENT_FILE" - echo "" >> "$COMMENT_FILE" - echo "*No new comments from this bot.*" >> "$COMMENT_FILE" - echo "" >> "$COMMENT_FILE" - echo "---" >> "$COMMENT_FILE" - echo "" >> "$COMMENT_FILE" - fi -done - -echo "Comments saved to: $COMMENT_FILE" >&2 - -# ======================================== -# Run Local Codex Review of Bot Feedback -# ======================================== - -# Consistent file naming: all round-N files refer to round N -CHECK_FILE="$LOOP_DIR/round-${NEXT_ROUND}-pr-check.md" -FEEDBACK_FILE="$LOOP_DIR/round-${NEXT_ROUND}-pr-feedback.md" - -echo "Running local Codex review of bot feedback..." >&2 - -# Build Codex prompt with per-bot analysis -CODEX_PROMPT_FILE="$LOOP_DIR/round-${NEXT_ROUND}-codex-prompt.md" -BOT_REVIEW_CONTENT=$(cat "$COMMENT_FILE") - -# Build list of expected bots for Codex (all configured bots) -EXPECTED_BOTS_LIST="" -for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - EXPECTED_BOTS_LIST="${EXPECTED_BOTS_LIST}- ${bot}\n" -done - -# Load goal tracker update template (with fallback) -GOAL_TRACKER_FILE="$LOOP_DIR/goal-tracker.md" -GOAL_TRACKER_TEMPLATE_VARS=( - "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" - "NEXT_ROUND=$NEXT_ROUND" -) -GOAL_TRACKER_UPDATE_FALLBACK="## Goal Tracker Update -After analysis, update the goal tracker at $GOAL_TRACKER_FILE with current status." - -GOAL_TRACKER_UPDATE_INSTRUCTIONS=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/codex-goal-tracker-update.md" "$GOAL_TRACKER_UPDATE_FALLBACK" "${GOAL_TRACKER_TEMPLATE_VARS[@]}") - -cat > "$CODEX_PROMPT_FILE" << EOF -# PR Review Validation (Per-Bot Analysis) - -Analyze the following bot reviews and determine approval status FOR EACH BOT. - -## Expected Bots -$(echo -e "$EXPECTED_BOTS_LIST") - -## Bot Reviews -$BOT_REVIEW_CONTENT - -## Your Task - -1. For EACH expected bot, analyze their review (if present) -2. Determine if each bot is: - - **APPROVE**: Bot explicitly approves or says "no issues found", "LGTM", "Didn't find any major issues", etc. - - **ISSUES**: Bot identifies specific problems that need fixing - - **NO_RESPONSE**: Bot did not post any new comments - -3. Output your analysis to $CHECK_FILE with this EXACT structure: - -### Per-Bot Status -| Bot | Status | Summary | -|-----|--------|---------| -| <bot_name> | APPROVE/ISSUES/NO_RESPONSE | <brief summary> | - -### Issues Found (if any) -List ALL specific issues from bots that have ISSUES status. - -### Approved Bots (to remove from active_bots) -List bots that should be removed from active tracking (those with APPROVE status). - -### Final Recommendation -- If ALL bots have APPROVE status: End with "APPROVE" on its own line -- If any bot has ISSUES status: End with "ISSUES_REMAINING" on its own line -- If any bot has NO_RESPONSE status: End with "WAITING_FOR_BOTS" on its own line -- If any bot response indicates usage/rate limits hit (e.g., "usage limits", "rate limit", "quota exceeded"): End with "USAGE_LIMIT_HIT" on its own line - -$GOAL_TRACKER_UPDATE_INSTRUCTIONS -EOF - -# Check if codex is available -if ! command -v codex &>/dev/null; then - REASON="# Codex Not Found - -The 'codex' command is not installed or not in PATH. -PR loop requires Codex CLI to validate bot reviews. - -**To fix:** -1. Install Codex CLI -2. Retry the exit - -Or use \`/humanize:cancel-pr-loop\` to cancel the loop." - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Codex not found" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# Run Codex -CODEX_ARGS=("-m" "$PR_CODEX_MODEL") -if [[ -n "$PR_CODEX_EFFORT" ]]; then - CODEX_ARGS+=("-c" "model_reasoning_effort=${PR_CODEX_EFFORT}") -fi - -# Determine automation flag based on environment variable -# Default: Use --full-auto (safe mode with sandbox) -# If HUMANIZE_CODEX_BYPASS_SANDBOX is "true" or "1": Use --dangerously-bypass-approvals-and-sandbox -CODEX_AUTO_FLAG="--full-auto" -if [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "true" ]] || [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "1" ]]; then - CODEX_AUTO_FLAG="--dangerously-bypass-approvals-and-sandbox" -fi - -CODEX_ARGS+=("$CODEX_AUTO_FLAG" "-C" "$PROJECT_ROOT") - -CODEX_PROMPT_CONTENT=$(cat "$CODEX_PROMPT_FILE") -CODEX_EXIT_CODE=0 - -printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$PR_CODEX_TIMEOUT" codex exec "${CODEX_ARGS[@]}" - \ - > "$CHECK_FILE" 2>/dev/null || CODEX_EXIT_CODE=$? - -if [[ $CODEX_EXIT_CODE -ne 0 ]]; then - REASON="# Codex Review Failed - -Codex failed to validate bot reviews (exit code: $CODEX_EXIT_CODE). - -Please retry or cancel the loop." - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Codex review failed" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -if [[ ! -s "$CHECK_FILE" ]]; then - REASON="# Codex Review Empty - -Codex produced no output when validating bot reviews. - -Please retry or cancel the loop." - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Codex review empty" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# ======================================== -# Check Codex Result and Update active_bots -# ======================================== - -CHECK_CONTENT=$(cat "$CHECK_FILE") -LAST_LINE=$(echo "$CHECK_CONTENT" | grep -v '^[[:space:]]*$' | tail -1) -LAST_LINE_TRIMMED=$(echo "$LAST_LINE" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') - -# Use "APPROVE" marker to indicate all bots approved -if [[ "$LAST_LINE_TRIMMED" == "APPROVE" ]]; then - echo "All bots have approved! PR loop complete." >&2 - - # Update goal tracker BEFORE exit (idempotent - won't duplicate if Codex already updated) - if [[ -f "$GOAL_TRACKER_FILE" ]]; then - # For APPROVE, we record 0 new issues - update_pr_goal_tracker "$GOAL_TRACKER_FILE" "$NEXT_ROUND" '{"issues": 0, "resolved": 0, "bot": "All"}' || true - fi - - mv "$STATE_FILE" "$LOOP_DIR/approve-state.md" - exit 0 -fi - -# Handle WAITING_FOR_BOTS - block exit but don't advance round -if [[ "$LAST_LINE_TRIMMED" == "WAITING_FOR_BOTS" ]]; then - echo "Some bots haven't responded yet. Blocking exit." >&2 - - REASON="# Waiting for Bot Responses - -Some bots haven't posted their reviews yet. - -**Options:** -1. Wait and try exiting again (bots may still be processing) -2. Comment on the PR to trigger bot reviews: - \`\`\`bash - gh pr comment $PR_NUMBER --body \"$PR_BOT_MENTION_STRING please review the latest changes\" - \`\`\` -3. Cancel the loop: \`/humanize:cancel-pr-loop\` - -**Note:** The round counter will NOT advance until all expected bots respond." - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Waiting for bot responses" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# Handle USAGE_LIMIT_HIT - terminate loop gracefully (service limitation, not code issue) -if [[ "$LAST_LINE_TRIMMED" == "USAGE_LIMIT_HIT" ]]; then - echo "Bot usage/rate limits detected. Terminating PR loop." >&2 - - # Move state file to indicate usage limit termination - mv "$STATE_FILE" "$LOOP_DIR/usage-limit-state.md" - - # Let exit proceed without blocking - the loop is over due to external limitation - exit 0 -fi - -# ======================================== -# Update active_bots in state file -# ======================================== - -# Extract approved bots from Codex output and remove them from active_bots -# Look for "### Approved Bots" section -# NOTE: Use awk for more robust extraction that handles: -# - Section at end of file (no following ###) -# - Section immediately followed by ### (empty section) -APPROVED_SECTION=$(awk '/^### Approved Bots/{found=1; next} found && /^###/{exit} found{print}' "$CHECK_FILE" || true) - -# Extract bots with issues from Codex output (for re-add logic) -# Look for "### Per-Bot Status" table and find bots with ISSUES status -# NOTE: Use awk for more robust extraction -ISSUES_SECTION=$(awk '/^### Per-Bot Status/{found=1; next} found && /^###/{exit} found{print}' "$CHECK_FILE" || true) - -# Build new active_bots array with re-add logic -# IMPORTANT: Process ALL configured bots, not just currently active ones -# This allows re-adding bots that were previously approved but now have new issues -declare -a NEW_ACTIVE_BOTS=() -# NOTE: Using _map_set/get instead of declare -A for macOS Bash 3.2 compatibility - -# First, identify bots with issues from Codex output -while IFS= read -r line; do - if echo "$line" | grep -qiE '\|[[:space:]]*ISSUES[[:space:]]*\|'; then - # Extract bot name from table row: | botname | ISSUES | summary | - BOT_WITH_ISSUE=$(echo "$line" | sed 's/|/\n/g' | sed -n '2p' | tr -d ' ') - if [[ -n "$BOT_WITH_ISSUE" ]]; then - _map_set "BOTS_WITH_ISSUES" "$BOT_WITH_ISSUE" "true" - fi - fi - if echo "$line" | grep -qiE '\|[[:space:]]*APPROVE[[:space:]]*\|'; then - # Extract bot name from table row: | botname | APPROVE | summary | - BOT_APPROVED=$(echo "$line" | sed 's/|/\n/g' | sed -n '2p' | tr -d ' ') - if [[ -n "$BOT_APPROVED" ]]; then - _map_set "BOTS_APPROVED" "$BOT_APPROVED" "true" - fi - fi -done <<< "$ISSUES_SECTION" - -# Process ALL configured bots (not just currently active) -# This allows re-adding previously approved bots if they post new issues -# Also handle timed-out bots by removing them from active_bots -for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - # Check if bot timed out - remove from active_bots - if [[ "$(_map_get BOTS_TIMED_OUT "$bot")" == "true" ]]; then - echo "Removing '$bot' from active_bots (timed out after ${PR_POLL_TIMEOUT}s)" >&2 - continue # Don't add to NEW_ACTIVE_BOTS - fi - - if [[ "$(_map_get BOTS_WITH_ISSUES "$bot")" == "true" ]]; then - # Bot has issues - add to active list - if [[ "$(_map_get BOTS_APPROVED "$bot")" == "true" ]]; then - echo "Bot '$bot' was previously approved but has new issues - re-adding to active" >&2 - else - echo "Bot '$bot' has issues - keeping active" >&2 - fi - NEW_ACTIVE_BOTS+=("$bot") - elif [[ "$(_map_get BOTS_APPROVED "$bot")" == "true" ]]; then - # Bot approved with no new issues - remove from active - echo "Removing '$bot' from active_bots (approved)" >&2 - elif echo "$APPROVED_SECTION" | grep -qi "$bot"; then - # Bot mentioned in approved section - remove - echo "Removing '$bot' from active_bots (in approved section)" >&2 - else - # Bot not mentioned in ISSUES or APPROVE - check if was active - WAS_ACTIVE=false - for active_bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$bot" == "$active_bot" ]]; then - WAS_ACTIVE=true - break - fi - done - if [[ "$WAS_ACTIVE" == "true" ]]; then - # Was active, not mentioned - keep active (NO_RESPONSE case) - echo "Bot '$bot' not mentioned - keeping active" >&2 - NEW_ACTIVE_BOTS+=("$bot") - fi - fi -done - -# Update state file with new active_bots and incremented round -TEMP_FILE="${STATE_FILE}.tmp.$$" - -# Build new YAML list for active_bots -NEW_ACTIVE_BOTS_YAML=$(build_yaml_list "${NEW_ACTIVE_BOTS[@]}") - -# ======================================== -# Update PR Goal Tracker -# ======================================== -# Extract issue counts from Codex output and update goal tracker -# Count issues by looking at the Issues Found section -ISSUES_FOUND_COUNT=0 -ISSUES_RESOLVED_COUNT=0 - -# Count issues in the "### Issues Found" section -if grep -q "### Issues Found" "$CHECK_FILE" 2>/dev/null; then - # Count list items: numbered (1., 2.) or bullet (-, *) in Issues Found section - # NOTE: Use awk for robust extraction (handles section at end of file) - ISSUES_FOUND_COUNT=$(awk '/^### Issues Found/{found=1; next} found && /^###/{exit} found{print}' "$CHECK_FILE" \ - | grep -cE '^[0-9]+\.|^- |^\* ' 2>/dev/null || echo "0") -fi - -# Count resolved issues: issues are only resolved when ALL bots approve -# NOTE: If we reach this point, not all bots have approved (full APPROVE case -# already triggered early exit above), so issues found in this round are NOT -# resolved yet. Setting resolved=0 prevents inflating the resolved count when -# only some bots approve while others report issues. -# ISSUES_RESOLVED_COUNT stays 0 - issues will be marked resolved in a future -# round when all bots approve and the early exit path records the resolution. - -# Call update_pr_goal_tracker if goal tracker exists -if [[ -f "$GOAL_TRACKER_FILE" ]]; then - # NOTE: Use lowercase "codex" to match configured bot names and avoid duplicate rows - # (Codex itself writes rows with lowercase names in goal tracker) - BOT_RESULTS_JSON="{\"bot\": \"codex\", \"issues\": $ISSUES_FOUND_COUNT, \"resolved\": $ISSUES_RESOLVED_COUNT}" - update_pr_goal_tracker "$GOAL_TRACKER_FILE" "$NEXT_ROUND" "$BOT_RESULTS_JSON" || true -fi - -# Build YAML list for configured_bots (never changes) -CONFIGURED_BOTS_YAML=$(build_yaml_list "${PR_CONFIGURED_BOTS_ARRAY[@]}") - -# Update latest_commit_sha to current HEAD (for force push detection in next round) -NEW_LATEST_COMMIT_SHA=$(run_with_timeout "$GIT_TIMEOUT" git rev-parse HEAD 2>/dev/null) || NEW_LATEST_COMMIT_SHA="$PR_LATEST_COMMIT_SHA" -# NOTE: Sort by committedDate before selecting last - API order is not guaranteed -# NOTE: Uses PR_LOOKUP_REPO for fork PR support -NEW_LATEST_COMMIT_AT=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_LOOKUP_REPO" --json commits \ - --jq '.commits | sort_by(.committedDate) | last | .committedDate' 2>/dev/null) || NEW_LATEST_COMMIT_AT="$PR_LATEST_COMMIT_AT" - -# Re-evaluate startup_case dynamically -# This allows case to change as bot comments arrive -BOTS_COMMA_LIST=$(IFS=','; echo "${PR_CONFIGURED_BOTS_ARRAY[*]}") -NEW_REVIEWER_STATUS=$("$PLUGIN_ROOT/scripts/check-pr-reviewer-status.sh" "$PR_NUMBER" --bots "$BOTS_COMMA_LIST" 2>/dev/null) || NEW_REVIEWER_STATUS="" -if [[ -n "$NEW_REVIEWER_STATUS" ]]; then - NEW_STARTUP_CASE=$(echo "$NEW_REVIEWER_STATUS" | jq -r '.case') - if [[ -n "$NEW_STARTUP_CASE" && "$NEW_STARTUP_CASE" != "null" ]]; then - if [[ "$NEW_STARTUP_CASE" != "${PR_STARTUP_CASE:-1}" ]]; then - echo "Startup case changed: ${PR_STARTUP_CASE:-1} -> $NEW_STARTUP_CASE" >&2 - fi - PR_STARTUP_CASE="$NEW_STARTUP_CASE" - fi -fi - -# Create updated state file (with last_trigger_at cleared - will be set when next @mention posted) -{ - echo "---" - echo "current_round: $NEXT_ROUND" - echo "max_iterations: $PR_MAX_ITERATIONS" - echo "pr_number: $PR_NUMBER" - echo "start_branch: $PR_START_BRANCH" - echo "configured_bots:${CONFIGURED_BOTS_YAML}" - echo "active_bots:${NEW_ACTIVE_BOTS_YAML}" - echo "codex_model: $PR_CODEX_MODEL" - echo "codex_effort: $PR_CODEX_EFFORT" - echo "codex_timeout: $PR_CODEX_TIMEOUT" - echo "poll_interval: $PR_POLL_INTERVAL" - echo "poll_timeout: $PR_POLL_TIMEOUT" - echo "started_at: $PR_STARTED_AT" - echo "startup_case: ${PR_STARTUP_CASE:-1}" - echo "latest_commit_sha: $NEW_LATEST_COMMIT_SHA" - echo "latest_commit_at: ${NEW_LATEST_COMMIT_AT:-}" - echo "last_trigger_at:" - echo "trigger_comment_id: ${PR_TRIGGER_COMMENT_ID:-}" - echo "---" -} > "$TEMP_FILE" -mv "$TEMP_FILE" "$STATE_FILE" - -# Check if all bots are now approved -if [[ ${#NEW_ACTIVE_BOTS[@]} -eq 0 ]]; then - echo "All bots have now approved! PR loop complete." >&2 - mv "$STATE_FILE" "$LOOP_DIR/approve-state.md" - exit 0 -fi - -# ======================================== -# Issues Remaining - Continue Loop -# ======================================== - -# Build new bot mention string -NEW_BOT_MENTION_STRING=$(build_bot_mention_string "${NEW_ACTIVE_BOTS[@]}") - -# Create feedback file for next round -cat > "$FEEDBACK_FILE" << EOF -# PR Loop Feedback (Round $NEXT_ROUND) - -## Bot Review Analysis - -$CHECK_CONTENT - ---- - -## Your Task - -Address the issues identified above: - -1. Read and understand each issue -2. Make the necessary code changes -3. Commit and push your changes -4. Comment on the PR to trigger re-review: - \`\`\`bash - gh pr comment $PR_NUMBER --body "$NEW_BOT_MENTION_STRING please review the latest changes" - \`\`\` -5. Write your resolution summary to: $LOOP_DIR/round-${NEXT_ROUND}-pr-resolve.md - ---- - -**Remaining active bots:** $(IFS=', '; echo "${NEW_ACTIVE_BOTS[*]}") -**Round:** $NEXT_ROUND of $PR_MAX_ITERATIONS -EOF - -SYSTEM_MSG="PR Loop: Round $NEXT_ROUND/$PR_MAX_ITERATIONS - Bot reviews identified issues" - -jq -n \ - --arg reason "$(cat "$FEEDBACK_FILE")" \ - --arg msg "$SYSTEM_MSG" \ - '{ - "decision": "block", - "reason": $reason, - "systemMessage": $msg - }' - -exit 0 diff --git a/prompt-template/block/finalize-contract-access.md b/prompt-template/block/finalize-contract-access.md new file mode 100644 index 00000000..7b757d9d --- /dev/null +++ b/prompt-template/block/finalize-contract-access.md @@ -0,0 +1,7 @@ +# Finalize Contract Access Blocked + +There is no active `round-N-contract.md` during the Finalize Phase. + +- Do not {{ACTION}} historical round contract files. +- Use `finalize-summary.md` for finalize-only notes. +- Use `goal-tracker.md` if you need the current mainline/backlog state. diff --git a/prompt-template/block/force-push-detected.md b/prompt-template/block/force-push-detected.md deleted file mode 100644 index b09feb50..00000000 --- a/prompt-template/block/force-push-detected.md +++ /dev/null @@ -1,17 +0,0 @@ -# Force Push Detected - -A **force push** has been detected on this PR. The commit SHA changed from `{{OLD_COMMIT}}` to `{{NEW_COMMIT}}` in a non-fast-forward manner. - -Force pushes reset the review state because the commit history has been rewritten. - -**Required Actions**: -1. The PR loop has updated its tracking to the new commit SHA -2. You must post a new trigger comment to restart the review cycle -3. Post a comment mentioning {{BOT_MENTION_STRING}} to trigger a new review - -**Example trigger comment**: -``` -{{BOT_MENTION_STRING}} Please review these changes. -``` - -After posting a trigger comment, you may attempt to continue. diff --git a/prompt-template/block/git-not-clean.md b/prompt-template/block/git-not-clean.md index 300e7fb8..f7fdfaf5 100644 --- a/prompt-template/block/git-not-clean.md +++ b/prompt-template/block/git-not-clean.md @@ -5,10 +5,12 @@ You are trying to stop, but you have **{{GIT_ISSUES}}**. **Required Actions**: 0. If the `code-simplifier` plugin is installed, use it to review and simplify your code before committing. Invoke via: `/code-simplifier`, `@agent-code-simplifier`, or `@code-simplifier:code-simplifier (agent)` 1. Review untracked files - add build artifacts to `.gitignore` -2. Stage real changes: `git add <files>` (or `git add -A` if all files should be tracked) +2. Stage only real changes with specific paths: `git add <files>` 3. Commit with a descriptive message following project conventions **Important Rules**: +- Do NOT use `git add -A`, `git add --all`, or `git add .` during an active RLCR loop +- Never stage `.humanize/` or legacy `.humanize-*` loop artifacts - Commit message must follow project conventions - AI tools (Claude, Codex, etc.) must NOT have authorship in commits - Do NOT include `Co-Authored-By: Claude` or similar AI attribution diff --git a/prompt-template/block/git-tracked-humanize.md b/prompt-template/block/git-tracked-humanize.md new file mode 100644 index 00000000..3a7b36df --- /dev/null +++ b/prompt-template/block/git-tracked-humanize.md @@ -0,0 +1,19 @@ +# Tracked Humanize State Blocked + +Detected tracked or staged files under `.humanize/`. + +These files are local Humanize loop state and must remain outside version control. + +## Required Fix + +1. Remove Humanize state from the index: + + git rm --cached -r .humanize + +2. Keep only real project files staged. +3. Retry the stop action after the local state is no longer tracked. + +## Important + +- Do NOT use `git add -f` on Humanize state files. +- Do NOT commit RLCR trackers, round summaries, contracts, or cancel/finalize markers. diff --git a/prompt-template/block/goal-tracker-modification.md b/prompt-template/block/goal-tracker-modification.md index f7f26384..30c77835 100644 --- a/prompt-template/block/goal-tracker-modification.md +++ b/prompt-template/block/goal-tracker-modification.md @@ -1,25 +1,14 @@ -# Goal Tracker Modification Blocked (Round {{CURRENT_ROUND}}) +# Goal Tracker Update Blocked (Round {{CURRENT_ROUND}}) -After Round 0, **only Codex can modify the Goal Tracker**. +After Round 0, you may update only the **MUTABLE SECTION** of the active goal tracker. -You CANNOT directly modify `goal-tracker.md` via Write, Edit, or Bash commands. +Use Write or Edit on: +`{{CORRECT_PATH}}` -## How to Request Changes +## Rules -Include a **"Goal Tracker Update Request"** section in your summary file: -`{{SUMMARY_FILE}}` +- Keep the **IMMUTABLE SECTION** unchanged +- Do not modify `goal-tracker.md` via Bash +- Do not write to an old loop session's tracker -Use this format: -```markdown -## Goal Tracker Update Request - -### Requested Changes: -- [E.g., "Mark Task X as completed with evidence: tests pass"] -- [E.g., "Add to Open Issues: discovered Y needs addressing"] -- [E.g., "Plan Evolution: changed approach from A to B because..."] - -### Justification: -[Explain why these changes are needed and how they serve the Ultimate Goal] -``` - -Codex will review your request and update the Goal Tracker if the changes are justified. +If you need Codex to correct tracker drift that you could not safely resolve yourself, include an optional `Goal Tracker Update Request` in your summary. diff --git a/prompt-template/block/mainline-drift-stop.md b/prompt-template/block/mainline-drift-stop.md new file mode 100644 index 00000000..2b7cc53d --- /dev/null +++ b/prompt-template/block/mainline-drift-stop.md @@ -0,0 +1,14 @@ +# Mainline Drift Circuit Breaker + +The RLCR loop has been stopped because the implementation failed to advance the mainline for **{{STALL_COUNT}} consecutive rounds**. + +- Last mainline verdict: `{{LAST_VERDICT}}` +- Plan anchor: `{{PLAN_FILE}}` +- Drift status: `replan_required` + +This loop should not continue automatically. + +Next action: +1. Re-read the original plan +2. Identify why recent rounds kept stalling or regressing +3. Start a fresh RLCR loop with a narrower recovered mainline objective diff --git a/prompt-template/block/mainline-verdict-missing.md b/prompt-template/block/mainline-verdict-missing.md new file mode 100644 index 00000000..bf822e53 --- /dev/null +++ b/prompt-template/block/mainline-verdict-missing.md @@ -0,0 +1,13 @@ +# Mainline Verdict Missing + +The implementation review output is missing the required line: + +`Mainline Progress Verdict: ADVANCED / STALLED / REGRESSED` + +Humanize cannot safely update the drift state or choose the correct next-round prompt without this verdict. + +Retry the exit so Codex reruns the implementation review. + +Files: +- Review result: {{REVIEW_RESULT_FILE}} +- Review prompt: {{REVIEW_PROMPT_FILE}} diff --git a/prompt-template/block/methodology-analysis-state-file-modification.md b/prompt-template/block/methodology-analysis-state-file-modification.md new file mode 100644 index 00000000..16020c14 --- /dev/null +++ b/prompt-template/block/methodology-analysis-state-file-modification.md @@ -0,0 +1,9 @@ +# Methodology Analysis State File Modification Blocked + +You cannot modify `methodology-analysis-state.md`. This file is managed by the loop system during the Methodology Analysis Phase. + +The Methodology Analysis Phase runs before the loop fully exits. Focus on: +1. Spawning an Opus agent to analyze development records +2. Reviewing the sanitized analysis report +3. Optionally helping the user file a GitHub issue with improvement suggestions +4. Writing your completion marker to `methodology-analysis-done.md` diff --git a/prompt-template/block/no-trigger-comment.md b/prompt-template/block/no-trigger-comment.md deleted file mode 100644 index 9f886f8e..00000000 --- a/prompt-template/block/no-trigger-comment.md +++ /dev/null @@ -1,17 +0,0 @@ -# No Trigger Comment Found - -The PR loop is waiting for a **trigger comment** before it can proceed. - -**Why is this required?** -- Startup case {{STARTUP_CASE}}: {{STARTUP_CASE_DESC}} -- Round {{CURRENT_ROUND}} requires explicit trigger to start the review cycle - -**Required Action**: -Post a comment on the PR mentioning {{BOT_MENTION_STRING}} to trigger a review. - -**Example trigger comment**: -``` -{{BOT_MENTION_STRING}} Please review these changes. -``` - -After posting a trigger comment, you may attempt to continue. diff --git a/prompt-template/block/pr-loop-prompt-write.md b/prompt-template/block/pr-loop-prompt-write.md deleted file mode 100644 index 625629ee..00000000 --- a/prompt-template/block/pr-loop-prompt-write.md +++ /dev/null @@ -1,9 +0,0 @@ -# PR Loop File Write Blocked - -You cannot write to `round-*-pr-comment.md` or `round-*-prompt.md` files in `.humanize/pr-loop/`. - -These files are generated by the PR loop system: -- `round-*-pr-comment.md`: Contains PR comments fetched from GitHub -- `round-*-prompt.md`: Contains instructions for the current round - -Both are read-only and managed by the system. diff --git a/prompt-template/block/pr-loop-state-modification.md b/prompt-template/block/pr-loop-state-modification.md deleted file mode 100644 index 5e17692d..00000000 --- a/prompt-template/block/pr-loop-state-modification.md +++ /dev/null @@ -1,12 +0,0 @@ -# PR Loop State File Modification Blocked - -You cannot modify `state.md` in `.humanize/pr-loop/`. This file is managed by the PR loop system. - -The state file contains: -- Current round number -- PR number and branch -- Active bots configuration -- Codex configuration -- Polling settings - -Modifying it would corrupt the PR loop state. diff --git a/prompt-template/block/round-contract-bash-write.md b/prompt-template/block/round-contract-bash-write.md new file mode 100644 index 00000000..bc012ffd --- /dev/null +++ b/prompt-template/block/round-contract-bash-write.md @@ -0,0 +1,7 @@ +# Round Contract Bash Write Blocked + +Do not use Bash commands to modify round contract files. + +Use the `Write` or `Edit` tool instead: + +`{{CORRECT_PATH}}` diff --git a/prompt-template/block/round-contract-missing.md b/prompt-template/block/round-contract-missing.md new file mode 100644 index 00000000..35a822b1 --- /dev/null +++ b/prompt-template/block/round-contract-missing.md @@ -0,0 +1,13 @@ +# Round Contract Missing + +Before you try to exit this round, write the current round contract to: +`{{ROUND_CONTRACT_FILE}}` + +The round contract must restate: +- The single **mainline objective** for this round +- The target ACs +- Which issues are truly **blocking** +- Which issues are **queued** and out of scope +- The concrete success criteria for this round + +Do not continue without a round contract. The loop uses it to prevent goal drift. diff --git a/prompt-template/block/wrong-contract-location.md b/prompt-template/block/wrong-contract-location.md new file mode 100644 index 00000000..04060c5f --- /dev/null +++ b/prompt-template/block/wrong-contract-location.md @@ -0,0 +1,5 @@ +# Wrong Round Contract Location + +Round contract files MUST be in the active loop directory. + +**Correct path**: `{{CORRECT_PATH}}` diff --git a/prompt-template/claude/drift-replan-prompt.md b/prompt-template/claude/drift-replan-prompt.md new file mode 100644 index 00000000..a5970c59 --- /dev/null +++ b/prompt-template/claude/drift-replan-prompt.md @@ -0,0 +1,68 @@ +Your work is not finished. Read and execute the below with ultrathink. + +## Drift Recovery Mode + +Codex judged the recent implementation rounds as failing to advance the mainline. + +- Consecutive stalled/regressed rounds: {{STALL_COUNT}} +- Last mainline verdict: {{LAST_MAINLINE_VERDICT}} + +This round is a **drift recovery round**. Do not continue with normal issue-clearing behavior. + +## Original Implementation Plan + +**IMPORTANT**: Re-anchor on the original plan first: +@{{PLAN_FILE}} + +## Required Recovery Re-anchor + +Before changing code: +- Re-read @{{PLAN_FILE}} +- Re-read @{{GOAL_TRACKER_FILE}} +- Re-read the recent round summaries and review results that led here +- Rewrite the round contract at @{{ROUND_CONTRACT_FILE}} + +Your recovery contract must contain: +- Exactly one recovered **mainline objective** +- The 1-2 target ACs that prove mainline progress this round +- The root cause of recent drift or stagnation +- Which issues are truly **blocking** the recovered mainline objective +- Which issues remain **queued** and explicitly out of scope +- Concrete success criteria that would change the verdict back to `ADVANCED` + +Do not start implementation until the recovery contract exists. + +## Task Lane Rules + +Use the Task system (TaskCreate, TaskUpdate, TaskList) with one required tag per task: +- `[mainline]` for plan-derived work that directly advances the recovered objective +- `[blocking]` for issues that prevent the recovered mainline objective from succeeding safely +- `[queued]` for non-blocking bugs, cleanup, or follow-up work + +Rules: +- This round must prove mainline movement, not just reduce noise +- `[blocking]` work is allowed only when it directly unblocks the recovered mainline objective +- `[queued]` work must stay documented but must NOT replace the recovered objective +- If a new issue does not block the recovered objective, tag it `[queued]` and keep moving on mainline work + +--- +Below is Codex's review result: +<!-- CODEX's REVIEW RESULT START --> +{{REVIEW_CONTENT}} +<!-- CODEX's REVIEW RESULT END --> +--- + +## Goal Tracker Reference + +Before starting work, **read and update** @{{GOAL_TRACKER_FILE}} as needed: +- Keep the immutable section unchanged +- Record the drift/stagnation cause in the mutable section if it changed planning +- Keep blocking vs queued issue classification accurate +- Ensure the tracker and contract now describe the same recovered mainline objective + +## Recovery Guardrails + +- Do not spend this round mostly on queued cleanup +- Do not broaden scope to compensate for previous stalls +- If the original approach was flawed, log the plan evolution explicitly instead of silently changing direction +- If you cannot produce a credible recovered mainline objective, say so in the summary with concrete blockers diff --git a/prompt-template/claude/finalize-phase-prompt.md b/prompt-template/claude/finalize-phase-prompt.md index 4d1c584b..2ee14176 100644 --- a/prompt-template/claude/finalize-phase-prompt.md +++ b/prompt-template/claude/finalize-phase-prompt.md @@ -40,9 +40,10 @@ The code-simplifier agent should focus on: ## Before Exiting -1. Complete all tasks (mark them as completed using TaskUpdate with status "completed") -2. Commit your changes with a descriptive message -3. Write your finalize summary to: **{{FINALIZE_SUMMARY_FILE}}** +1. Complete all `[mainline]` and `[blocking]` tasks (mark them as completed using TaskUpdate with status "completed") +2. `[queued]` tasks may remain only if they are documented as non-blocking follow-up work +3. Commit your changes with a descriptive message +4. Write your finalize summary to: **{{FINALIZE_SUMMARY_FILE}}** Your summary should include: - What simplifications were made diff --git a/prompt-template/claude/finalize-phase-skipped-prompt.md b/prompt-template/claude/finalize-phase-skipped-prompt.md index 654fabdb..5cb01c3c 100644 --- a/prompt-template/claude/finalize-phase-skipped-prompt.md +++ b/prompt-template/claude/finalize-phase-skipped-prompt.md @@ -39,9 +39,10 @@ These constraints are **non-negotiable**: ## Before Exiting -1. Complete all tasks (mark them as completed using TaskUpdate with status "completed") -2. Commit your changes with a descriptive message -3. Write your finalize summary to: **{{FINALIZE_SUMMARY_FILE}}** +1. Complete all `[mainline]` and `[blocking]` tasks (mark them as completed using TaskUpdate with status "completed") +2. `[queued]` tasks may remain only if they are documented as non-blocking follow-up work +3. Commit your changes with a descriptive message +4. Write your finalize summary to: **{{FINALIZE_SUMMARY_FILE}}** Your summary should include: - What work was done diff --git a/prompt-template/claude/goal-tracker-update-request.md b/prompt-template/claude/goal-tracker-update-request.md index 4c00d483..b685fd51 100644 --- a/prompt-template/claude/goal-tracker-update-request.md +++ b/prompt-template/claude/goal-tracker-update-request.md @@ -1,11 +1,12 @@ -**If Goal Tracker needs updates**, include this section in your summary: +**Optional fallback**: if you could not safely update the mutable section of `goal-tracker.md` directly, include this section in your summary: ```markdown ## Goal Tracker Update Request ### Requested Changes: - [E.g., "Mark Task X as completed with evidence: tests pass"] -- [E.g., "Add to Open Issues: discovered Y needs addressing"] +- [E.g., "Add to Blocking Side Issues: bug Y blocks AC-2"] +- [E.g., "Add to Queued Side Issues: cleanup Z is non-blocking"] - [E.g., "Plan Evolution: changed approach from A to B because..."] - [E.g., "Defer Task Z because... (impact on AC: none/minimal)"] @@ -13,4 +14,4 @@ [Explain why these changes are needed and how they serve the Ultimate Goal] ``` -Codex will review your request and update the Goal Tracker if justified. +Codex will review your request and reconcile the Goal Tracker if justified. diff --git a/prompt-template/claude/methodology-analysis-prompt.md b/prompt-template/claude/methodology-analysis-prompt.md new file mode 100644 index 00000000..085153e5 --- /dev/null +++ b/prompt-template/claude/methodology-analysis-prompt.md @@ -0,0 +1,73 @@ +# Methodology Analysis Phase + +The RLCR loop has reached its exit point. + +**Exit reason**: {{EXIT_REASON}} - {{EXIT_REASON_DESCRIPTION}} +**Rounds completed**: {{CURRENT_ROUND}} of {{MAX_ITERATIONS}} + +Before the loop fully exits, please perform a methodology improvement analysis. This analysis helps improve the Humanize development methodology itself -- it is NOT about the project you just worked on. + +## Instructions + +### 1. Spawn an Opus Agent for Sanitized Analysis + +Use the Agent tool with `model: "opus"` to spawn an analysis agent. Give it this task: + +**Agent prompt**: Read the development records in `{{LOOP_DIR}}`: +- All files matching `round-*-summary.md` +- All files matching `round-*-review-result.md` + +Analyze these records from a **pure methodology perspective** and write your findings to `{{LOOP_DIR}}/methodology-analysis-report.md`. + +**CRITICAL SANITIZATION RULES** - The report MUST NOT contain: +- File paths, directory paths, or module paths +- Function names, variable names, class names, or method names +- Branch names, commit hashes, or git identifiers +- Business domain terms, product names, or feature names +- Code snippets or code fragments of any kind +- Raw error messages or stack traces +- Project-specific URLs or endpoints +- Any information that could identify the specific project + +**Focus areas for analysis**: +- Iteration efficiency: Were rounds productive or did they repeat similar work? +- Feedback loop quality: Did reviewer feedback lead to meaningful improvements? +- Stagnation patterns: Were there signs of going in circles? +- Review effectiveness: Did reviews catch real issues or create false positives? +- Plan-to-execution alignment: Did execution follow the plan or drift? +- Round count vs. progress ratio: Was the number of rounds proportional to progress? +- Communication clarity: Were summaries and reviews clear and actionable? + +**Output format**: Write a structured report with methodology improvement suggestions. Each suggestion should describe a general pattern observed and a concrete improvement to the RLCR methodology. If no improvements are found, write a brief note saying the methodology worked well for this session. + +### 2. Read the Analysis Report + +After the agent completes, read `{{LOOP_DIR}}/methodology-analysis-report.md`. ALL subsequent user-facing content MUST be derived solely from this report -- do NOT reference raw development records directly. + +### 3. Handle Results + +**If no improvements found**: Briefly inform the user that the methodology analysis found no significant improvement suggestions. Then write a completion note to `{{LOOP_DIR}}/methodology-analysis-done.md` and exit. + +**If improvements found**: + +a) Report to the user: + - Brief summary of the exit reason ({{EXIT_REASON}}: {{EXIT_REASON_DESCRIPTION}}) + - Methodology improvement suggestions from the report + +b) Use `AskUserQuestion` to ask if the user would like to help improve Humanize by opening a GitHub issue with these suggestions. Emphasize: + - This is completely voluntary + - The content is fully sanitized (no project-specific information) + - It helps improve the methodology for everyone + +c) **If user declines**: Thank them, write completion marker to `{{LOOP_DIR}}/methodology-analysis-done.md`, and exit. + +d) **If user agrees**: + - Draft a GitHub issue title and body from the analysis report + - Show the draft via a second `AskUserQuestion` for the user to review and confirm + - If confirmed: run `gh issue create --repo PolyArch/humanize --title "..." --body "..."` + - If `gh` is not available, provide the title and body so the user can create the issue manually + - Write completion marker to `{{LOOP_DIR}}/methodology-analysis-done.md` and exit + +### 4. Completion Marker + +You MUST write meaningful content to `{{LOOP_DIR}}/methodology-analysis-done.md` before exiting. This file signals that the analysis phase is complete. A brief summary of what was done (e.g., "Analysis complete, no suggestions" or "Analysis complete, issue filed") is sufficient. diff --git a/prompt-template/claude/next-round-prompt.md b/prompt-template/claude/next-round-prompt.md index b3aaff01..fd1b1cfe 100644 --- a/prompt-template/claude/next-round-prompt.md +++ b/prompt-template/claude/next-round-prompt.md @@ -9,8 +9,35 @@ This plan contains the full scope of work and requirements. Ensure your work ali --- -For all tasks that need to be completed, please use the Task system (TaskCreate, TaskUpdate, TaskList) to track each item in order of importance. -You are strictly prohibited from only addressing the most important issues - you MUST create Tasks for ALL discovered issues and attempt to resolve each one. +## Round Re-anchor (REQUIRED FIRST STEP) + +Before writing code: +- Re-read @{{PLAN_FILE}} +- Re-read @{{GOAL_TRACKER_FILE}} +- Re-read the most recent round summaries/reviews that led to this round +- Write the current round contract to @{{ROUND_CONTRACT_FILE}} + +Your round contract must contain: +- Exactly one **mainline objective** +- The 1-2 target ACs for this round +- Which issues are truly **blocking** that mainline objective +- Which issues are **queued** and explicitly out of scope +- Concrete success criteria for this round + +Do not start implementation until the round contract exists. + +## Task Lane Rules + +Use the Task system (TaskCreate, TaskUpdate, TaskList) with one required tag per task: +- `[mainline]` for plan-derived work that directly advances this round's objective +- `[blocking]` for issues that prevent the mainline objective from succeeding safely +- `[queued]` for non-blocking bugs, cleanup, or follow-up work + +Rules: +- `[mainline]` work is the round's primary success condition +- `[blocking]` work is allowed only when it truly blocks the mainline objective +- `[queued]` work must be documented but must NOT replace the round objective +- If a new bug does not block the current objective, tag it `[queued]` and keep moving on mainline work Before executing each task in this round: 1. Read @{{BITLESSON_FILE}} @@ -24,13 +51,25 @@ Below is Codex's review result: <!-- CODEX's REVIEW RESULT END --> --- -## Goal Tracker Reference (READ-ONLY after Round 0) +## Goal Tracker Reference Before starting work, **read** @{{GOAL_TRACKER_FILE}} to understand: - The Ultimate Goal and Acceptance Criteria you're working toward - Which tasks are Active, Completed, or Deferred +- Which side issues are blocking vs queued - Any Plan Evolution that has occurred -- Open Issues that need attention +- The latest side-issue state that needs attention + +**IMPORTANT**: Keep the mutable section of `goal-tracker.md` up to date during the round. +Do NOT change the immutable section after Round 0. +If you cannot safely reconcile the tracker yourself, include an optional "Goal Tracker Update Request" section in your summary (see below). + +## Mainline Guardrails -**IMPORTANT**: You CANNOT directly modify goal-tracker.md after Round 0. -If you need to update the Goal Tracker, include a "Goal Tracker Update Request" section in your summary (see below). +- Keep the mainline objective from @{{ROUND_CONTRACT_FILE}} stable for this round +- Do not let queued issues take over the round +- If Codex reported several findings, classify them into: + - mainline gaps + - blocking side issues + - queued side issues +- Only mainline gaps and blocking side issues should drive the next code changes diff --git a/prompt-template/claude/post-alignment-action-items.md b/prompt-template/claude/post-alignment-action-items.md index 28611ec0..c78e95d0 100644 --- a/prompt-template/claude/post-alignment-action-items.md +++ b/prompt-template/claude/post-alignment-action-items.md @@ -5,3 +5,4 @@ This round follows a Full Goal Alignment Check. Pay special attention to: - **Forgotten Items**: Codex may have identified tasks that were being ignored. Address them. - **AC Status**: If any Acceptance Criteria were marked NOT MET, prioritize work toward those. - **Deferred Items**: If any deferrals were flagged as unjustified, un-defer them now. +- **Queued Issues**: Keep non-blocking follow-up work queued unless it now clearly blocks mainline progress. diff --git a/prompt-template/claude/review-phase-prompt.md b/prompt-template/claude/review-phase-prompt.md index 158ca0f0..e180e418 100644 --- a/prompt-template/claude/review-phase-prompt.md +++ b/prompt-template/claude/review-phase-prompt.md @@ -2,14 +2,39 @@ You are in the **Review Phase**. Codex has performed a code review and found issues that need to be addressed. +## Required Re-anchor + +Before touching code: +- Re-read the original plan at @{{PLAN_FILE}} +- Re-read the goal tracker at @{{GOAL_TRACKER_FILE}} +- Refresh the current round contract at @{{ROUND_CONTRACT_FILE}} + +The round contract must preserve a single mainline objective. Code review findings do NOT automatically become the new round objective. + ## Review Results {{REVIEW_CONTENT}} +## Issue Classification + +Classify each review finding before acting on it: +- **blocking side issue**: prevents the current mainline objective from succeeding safely or prevents review acceptance +- **queued side issue**: valid follow-up, but does not block the current round objective + +Queued issues may be documented, but they must NOT take over the round. + +## Task Rules + +Every task must use one lane tag: +- `[blocking]` for review findings that must be fixed now +- `[queued]` for non-blocking follow-up work + +Do not create new `[mainline]` tasks in review phase unless the review proves the previous mainline objective was incomplete. + ## Instructions -1. **Read `.humanize/bitlesson.md` and run `bitlesson-selector`** for each fix task before coding -2. **Address all issues** marked with `[P0-9]` severity markers +1. **Refresh the round contract** at `{{ROUND_CONTRACT_FILE}}` +2. **Address blocking issues first** and keep the mainline objective stable 3. **Focus on fixes only** - do not add new features or make unrelated changes 4. **Commit your changes** after fixing the issues 5. **Write your summary** to: `{{SUMMARY_FILE}}` @@ -17,9 +42,13 @@ You are in the **Review Phase**. Codex has performed a code review and found iss ## Summary Template Your summary should include: -- Which issues were fixed -- How each issue was resolved +- The mainline objective for this round +- Which blocking issues were fixed +- Which issues were reclassified as queued follow-up +- How each fixed issue was resolved - Any issues that could not be resolved (with explanation) +- Confirmation that `goal-tracker.md` was updated if the blocking/queued issue lists changed +- A Goal Tracker Update Request only if tracker reconciliation still needs Codex help ## Important Notes diff --git a/prompt-template/codex/commit-history-section.md b/prompt-template/codex/commit-history-section.md new file mode 100644 index 00000000..48ca2e35 --- /dev/null +++ b/prompt-template/codex/commit-history-section.md @@ -0,0 +1,12 @@ +## Development History (Integral Context) + +Accumulated commits since loop start (oldest first): +``` +{{COMMIT_HISTORY}} +``` + +### Recent Round Files +Read these files before conducting your review to understand the trajectory of work: +{{RECENT_ROUND_FILES}} + +Use this history to identify patterns across rounds: recurring issues, stalled progress, or drift from the mainline objective. Weight recent rounds more heavily but watch for systemic trends in the full commit log. diff --git a/prompt-template/codex/full-alignment-review.md b/prompt-template/codex/full-alignment-review.md index d8ced81b..4367810e 100644 --- a/prompt-template/codex/full-alignment-review.md +++ b/prompt-template/codex/full-alignment-review.md @@ -16,6 +16,8 @@ You MUST read this plan file first to understand the full scope of work before c <!-- CLAUDE's WORK SUMMARY END --> --- +{{COMMIT_HISTORY_SECTION}} + ## Part 1: Goal Tracker Audit (MANDATORY) Read @{{GOAL_TRACKER_FILE}} and verify: @@ -47,16 +49,32 @@ Estimated remaining rounds: ? Critical blockers: [list if any] ``` -## Part 2: Implementation Review +## Part 2: Mainline Drift Audit (MANDATORY) + +Determine whether the recent rounds are still serving the original plan: +- Is the current round's mainline objective clear and singular? +- Has Claude been advancing mainline ACs, or mostly clearing side issues? +- Which findings are true **blocking side issues** versus merely **queued side issues**? + +Include a short drift summary: +``` +Mainline Progress Verdict: ADVANCED / STALLED / REGRESSED +Blocking Side Issues: N +Queued Side Issues: N +``` + +The `Mainline Progress Verdict` line is mandatory. If you omit it, the Humanize stop hook will block the round and require the review to be rerun. + +## Part 3: Implementation Review - Conduct a deep critical review of the implementation - Verify Claude's claims match reality - Identify any gaps, bugs, or incomplete work - Reference @{{DOCS_PATH}} for design documents -## Part 3: {{GOAL_TRACKER_UPDATE_SECTION}} +## Part 4: {{GOAL_TRACKER_UPDATE_SECTION}} -## Part 4: Progress Stagnation Check (MANDATORY for Full Alignment Rounds) +## Part 5: Progress Stagnation Check (MANDATORY for Full Alignment Rounds) To implement the original plan at @{{PLAN_FILE}}, we have completed **{{COMPLETED_ITERATIONS}} iterations** (Round 0 to Round {{CURRENT_ROUND}}). @@ -83,10 +101,13 @@ The project's `.humanize/rlcr/{{LOOP_TIMESTAMP}}/` directory contains the histor **If development is stagnating**, write **STOP** (as a single word on its own line) as the last line of your review output @{{REVIEW_RESULT_FILE}} instead of COMPLETE. -## Part 5: Output Requirements +## Part 6: Output Requirements - If issues found OR any AC is NOT MET (including deferred ACs), write your findings to @{{REVIEW_RESULT_FILE}} -- Include specific action items for Claude to address +- Include specific action items for Claude to address, classified into: + - Mainline Gaps + - Blocking Side Issues + - Queued Side Issues - **If development is stagnating** (see Part 4), write "STOP" as the last line - **CRITICAL**: Only write "COMPLETE" as the last line if ALL ACs from the original plan are FULLY MET with no deferrals - DEFERRED items are considered INCOMPLETE - do NOT output COMPLETE if any AC is deferred diff --git a/prompt-template/codex/goal-tracker-update-section.md b/prompt-template/codex/goal-tracker-update-section.md index 77cbedca..fb312db8 100644 --- a/prompt-template/codex/goal-tracker-update-section.md +++ b/prompt-template/codex/goal-tracker-update-section.md @@ -1,17 +1,18 @@ ## Goal Tracker Update Requests (YOUR RESPONSIBILITY) -**Important**: Claude cannot directly modify `goal-tracker.md` after Round 0. If Claude's summary contains a "Goal Tracker Update Request" section, YOU must: +Claude should normally keep the **mutable section** of `goal-tracker.md` up to date directly. If Claude's summary contains a "Goal Tracker Update Request" section, or if you detect tracker drift during review, YOU must: -1. **Evaluate the request**: Is the change justified? Does it serve the Ultimate Goal? -2. **If approved**: Update @{{GOAL_TRACKER_FILE}} yourself with the requested changes: +1. **Evaluate the tracker state**: Is the mutable section still aligned with the Ultimate Goal and current AC progress? +2. **If correction is needed**: Update @{{GOAL_TRACKER_FILE}} yourself with the requested changes: - Move tasks between Active/Completed/Deferred sections as appropriate - Add entries to "Plan Evolution Log" with round number and justification - - Add new issues to "Open Issues" if discovered + - Add new issues to "Blocking Side Issues" or "Queued Side Issues" as appropriate - **NEVER modify the IMMUTABLE SECTION** (Ultimate Goal and Acceptance Criteria) -3. **If rejected**: Include in your review why the request was rejected +3. **If you reject a requested tracker change**: Include in your review why it was rejected Common update requests you should handle: - Task completion: Move from "Active Tasks" to "Completed and Verified" -- New issues: Add to "Open Issues" table +- New blocking issues: Add to "Blocking Side Issues" +- New queued issues: Add to "Queued Side Issues" - Plan changes: Add to "Plan Evolution Log" with your assessment - Deferrals: Only allow with strong justification; add to "Explicitly Deferred" diff --git a/prompt-template/codex/regular-review.md b/prompt-template/codex/regular-review.md index 6d0a8671..4d4a8680 100644 --- a/prompt-template/codex/regular-review.md +++ b/prompt-template/codex/regular-review.md @@ -17,6 +17,8 @@ Below is Claude's summary of the work completed: <!-- CLAUDE's WORK SUMMARY END --> --- +{{COMMIT_HISTORY_SECTION}} + ## Part 1: Implementation Review - Your task is to conduct a deep critical review, focusing on finding implementation issues and identifying gaps between "plan-design" and actual implementation. @@ -44,11 +46,28 @@ Include a brief Goal Alignment Summary in your review: ACs: X/Y addressed | Forgotten items: N | Unjustified deferrals: N ``` -## Part 3: {{GOAL_TRACKER_UPDATE_SECTION}} +## Part 3: Required Finding Classification + +You MUST classify your findings into these lanes: +- **Mainline Gaps**: plan-derived work or AC progress that is missing, incomplete, or regressing +- **Blocking Side Issues**: bugs or implementation issues that block the current mainline objective from succeeding safely +- **Queued Side Issues**: valid non-blocking follow-up issues that should be documented but must NOT take over the next round + +Also include a one-line verdict: +``` +Mainline Progress Verdict: ADVANCED / STALLED / REGRESSED +``` + +This verdict line is mandatory. If you omit it, the Humanize stop hook will block the round and require the review to be rerun. + +If Claude mostly worked on queued side issues and failed to advance the mainline, say so explicitly. + +## Part 4: {{GOAL_TRACKER_UPDATE_SECTION}} -## Part 4: Output Requirements +## Part 5: Output Requirements - In short, your review comments can include: problems/findings/blockers; claims that don't match reality; implementation plans for deferred work (to be implemented now); implementation plans for unfinished work; goal alignment issues. +- Your output should be structured so Claude can tell which items are mainline gaps, blocking side issues, and queued side issues. - If after your investigation the actual situation does not match what Claude claims to have completed, or there is pending work to be done, output your review comments to @{{REVIEW_RESULT_FILE}}. - **CRITICAL**: Only output "COMPLETE" as the last line if ALL tasks from the original plan are FULLY completed with no deferrals - DEFERRED items are considered INCOMPLETE - do NOT output COMPLETE if any task is deferred diff --git a/prompt-template/idea/gen-idea-template.md b/prompt-template/idea/gen-idea-template.md new file mode 100644 index 00000000..5feeb8a3 --- /dev/null +++ b/prompt-template/idea/gen-idea-template.md @@ -0,0 +1,31 @@ +# <TITLE> + +## Original Idea + +<ORIGINAL_IDEA> + +## Primary Direction: <PRIMARY_NAME> + +### Rationale + +<PRIMARY_RATIONALE> + +### Approach Summary + +<PRIMARY_APPROACH_SUMMARY> + +### Objective Evidence + +<PRIMARY_OBJECTIVE_EVIDENCE> + +### Known Risks + +<PRIMARY_KNOWN_RISKS> + +## Alternative Directions Considered + +<ALTERNATIVES> + +## Synthesis Notes + +<SYNTHESIS_NOTES> diff --git a/prompt-template/pr-loop/codex-goal-tracker-update.md b/prompt-template/pr-loop/codex-goal-tracker-update.md deleted file mode 100644 index 65ba5379..00000000 --- a/prompt-template/pr-loop/codex-goal-tracker-update.md +++ /dev/null @@ -1,64 +0,0 @@ -## Goal Tracker Update Instructions - -After completing your analysis, update the goal tracker file at `{{GOAL_TRACKER_FILE}}`: - -### Required Updates - -1. **Add row to Issue Summary table:** - - Add a new row for this round with your review results - - Format: `| {{NEXT_ROUND}} | <reviewer_name> | <issues_found> | <issues_resolved> | <status> |` - - Status should be: "Issues Found", "All Resolved", or "Approved" - -2. **Update Total Statistics section:** - - Increment `Total Issues Found` by number of new issues discovered - - Increment `Total Issues Resolved` by number of issues you verified as fixed - - Update `Remaining` to be (Total Found - Total Resolved) - -3. **Add Issue Log entry for this round:** - - Create heading: `### Round {{NEXT_ROUND}}` - - List each issue or approval with details - - Include reviewer name and brief description - -### Example Goal Tracker Update - -If bot "claude" reported 2 new issues and "codex" found 0 issues (approved): - -```markdown -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | -| 1 | claude | 2 | 0 | Issues Found | -| 1 | codex | 0 | 0 | Approved | - -## Total Statistics - -- Total Issues Found: 2 -- Total Issues Resolved: 0 -- Remaining: 2 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* - -Started: 2026-01-18T10:00:00Z -Startup Case: 1 - -### Round 1 -**claude** found 2 issues: -1. Missing error handling in auth.ts -2. Test coverage below 80% - -**codex** approved - no issues found. -``` - -### Important Rules - -- Keep the file structure intact -- Use proper markdown table formatting -- Only update the sections mentioned above (Issue Summary, Total Statistics, Issue Log) -- Do not modify the header sections (PR Information, Ultimate Goal) -- Add to existing tables, do not replace them -- Each reviewer gets a separate row in Issue Summary diff --git a/prompt-template/pr-loop/critical-requirements-has-comments.md b/prompt-template/pr-loop/critical-requirements-has-comments.md deleted file mode 100644 index 4855a306..00000000 --- a/prompt-template/pr-loop/critical-requirements-has-comments.md +++ /dev/null @@ -1,24 +0,0 @@ - -=========================================== -CRITICAL - Work Completion Requirements -=========================================== - -When you complete your work, you MUST: - -1. COMMIT and PUSH your changes: - - Create a commit with descriptive message - - Push to the remote repository - -2. Comment on the PR to trigger re-review: - gh pr comment {{PR_NUMBER}} --body "{{BOT_MENTION_STRING}} please review" - -3. Write your resolution summary to: - {{RESOLVE_PATH}} - - The summary should include: - - Issues addressed - - Files modified - - Tests added (if any) - -The Stop Hook will then poll for bot reviews. -=========================================== diff --git a/prompt-template/pr-loop/critical-requirements-no-comments.md b/prompt-template/pr-loop/critical-requirements-no-comments.md deleted file mode 100644 index 1b043501..00000000 --- a/prompt-template/pr-loop/critical-requirements-no-comments.md +++ /dev/null @@ -1,21 +0,0 @@ - -=========================================== -CRITICAL - Work Completion Requirements -=========================================== - -When you complete your work, you MUST: - -1. Write your resolution summary to: - {{RESOLVE_PATH}} - - The summary should note: - - This is Round 0 awaiting initial bot reviews - - No issues to address yet - -2. Try to exit - the Stop Hook will poll for bot reviews - -DO NOT comment on the PR to trigger review - the bots will -review automatically since this is a new PR. - -The Stop Hook will poll for bot reviews. -=========================================== diff --git a/prompt-template/pr-loop/goal-tracker-initial.md b/prompt-template/pr-loop/goal-tracker-initial.md deleted file mode 100644 index d95e9936..00000000 --- a/prompt-template/pr-loop/goal-tracker-initial.md +++ /dev/null @@ -1,33 +0,0 @@ -# PR Review Goal Tracker - -## PR Information - -- **PR Number:** #{{PR_NUMBER}} -- **Branch:** {{START_BRANCH}} -- **Started:** {{STARTED_AT}} -- **Monitored Bots:** {{ACTIVE_BOTS_DISPLAY}} -- **Startup Case:** {{STARTUP_CASE}} - -## Ultimate Goal - -Get all monitored bot reviewers ({{ACTIVE_BOTS_DISPLAY}}) to approve this PR. - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | - -## Total Statistics - -- Total Issues Found: 0 -- Total Issues Resolved: 0 -- Remaining: 0 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* - -Started: {{STARTED_AT}} -Startup Case: {{STARTUP_CASE}} diff --git a/prompt-template/pr-loop/round-0-header.md b/prompt-template/pr-loop/round-0-header.md deleted file mode 100644 index e694ce41..00000000 --- a/prompt-template/pr-loop/round-0-header.md +++ /dev/null @@ -1,15 +0,0 @@ -Read and execute below with ultrathink - -## PR Review Loop (Round 0) - -You are in a PR review loop monitoring feedback from remote review bots. - -**PR Information:** -- PR Number: #{{PR_NUMBER}} -- Branch: {{START_BRANCH}} -- Active Bots: {{ACTIVE_BOTS_DISPLAY}} - -## Review Comments - -The following comments have been fetched from the PR: - diff --git a/prompt-template/pr-loop/round-0-task-has-comments.md b/prompt-template/pr-loop/round-0-task-has-comments.md deleted file mode 100644 index 37f3e9f1..00000000 --- a/prompt-template/pr-loop/round-0-task-has-comments.md +++ /dev/null @@ -1,43 +0,0 @@ - ---- - -## Your Task - -1. **Analyze the comments above**, prioritizing: - - Human comments first (they take precedence) - - Bot comments (newest first) - -2. **Fix any issues** identified by the reviewers: - - Read the relevant code files - - Make necessary changes - - Create appropriate tests if needed - -3. **After fixing issues**: - - Commit your changes with a descriptive message - - Push to the remote repository - - Comment on the PR to trigger re-review: - ```bash - gh pr comment {{PR_NUMBER}} --body "{{BOT_MENTION_STRING}} please review the latest changes" - ``` - -4. **Write your resolution summary** to: @{{RESOLVE_PATH}} - - List what issues were addressed - - Files modified - - Tests added (if any) - ---- - -## Important Rules - -1. **Do not modify state files**: The .humanize/pr-loop/ files are managed by the system -2. **Always push changes**: Your fixes must be pushed for bots to review them -3. **Use the correct comment format**: Tag the bots to trigger their reviews -4. **Be thorough**: Address all valid concerns from the reviewers - ---- - -Note: After you write your summary and try to exit, the Stop Hook will: -1. Poll for new bot reviews (every 30 seconds, up to 15 minutes per bot) -2. When reviews arrive, local Codex will validate if they indicate approval -3. If issues remain, you will receive feedback and continue -4. If all bots approve, the loop ends diff --git a/prompt-template/pr-loop/round-0-task-no-comments.md b/prompt-template/pr-loop/round-0-task-no-comments.md deleted file mode 100644 index 6ed6d9a2..00000000 --- a/prompt-template/pr-loop/round-0-task-no-comments.md +++ /dev/null @@ -1,30 +0,0 @@ - ---- - -## Your Task - -This PR has no review comments yet. The monitored bots ({{ACTIVE_BOTS_DISPLAY}}) will automatically review the PR - you do NOT need to comment to trigger the first review. - -1. **Wait for automatic bot reviews**: - - Simply write your summary and try to exit - - The Stop Hook will poll for the first bot reviews - -2. **Write your initial summary** to: @{{RESOLVE_PATH}} - - Note that this is Round 0 awaiting initial bot reviews - - No issues to address yet - ---- - -## Important Rules - -1. **Do not comment to trigger review**: First reviews are automatic -2. **Do not modify state files**: The .humanize/pr-loop/ files are managed by the system -3. **Trust the process**: The Stop Hook manages polling and Codex validation - ---- - -Note: After you write your summary and try to exit, the Stop Hook will: -1. Poll for bot reviews (every 30 seconds, up to 15 minutes per bot) -2. When reviews arrive, local Codex will validate if they indicate approval -3. If issues are found, you will receive feedback and continue -4. If all bots approve, the loop ends diff --git a/scripts/ask-codex.sh b/scripts/ask-codex.sh index 9c0a734c..fee439a8 100755 --- a/scripts/ask-codex.sh +++ b/scripts/ask-codex.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Ask Codex - One-shot consultation with Codex # @@ -55,7 +55,7 @@ USAGE: OPTIONS: --codex-model <MODEL:EFFORT> - Codex model and reasoning effort (default from config, fallback gpt-5.4:high) + Codex model and reasoning effort (default from config, fallback gpt-5.5:high) --codex-timeout <SECONDS> Timeout for the Codex query in seconds (default: 3600) -h, --help Show this help message @@ -68,7 +68,7 @@ DESCRIPTION: EXAMPLES: /humanize:ask-codex How should I structure the authentication module? - /humanize:ask-codex --codex-model gpt-5.4:high What are the performance bottlenecks? + /humanize:ask-codex --codex-model gpt-5.5:high What are the performance bottlenecks? /humanize:ask-codex --codex-timeout 300 Review the error handling in src/api/ ENVIRONMENT: @@ -189,11 +189,11 @@ fi # Detect Project Root # ======================================== -if git rev-parse --show-toplevel &>/dev/null; then - PROJECT_ROOT=$(git rev-parse --show-toplevel) -else - PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" -fi +PROJECT_ROOT="$(resolve_project_root)" || { + echo "Error: Cannot determine project root." >&2 + echo " Set CLAUDE_PROJECT_DIR or run inside a git repository." >&2 + exit 1 +} # ======================================== # Create Storage Directories @@ -234,6 +234,7 @@ $QUESTION - Effort: $CODEX_EFFORT - Timeout: ${CODEX_TIMEOUT}s - Timestamp: $TIMESTAMP +- Tool: codex EOF # ======================================== @@ -317,6 +318,7 @@ if [[ $CODEX_EXIT_CODE -eq 124 ]]; then # Save metadata even on timeout cat > "$SKILL_DIR/metadata.md" << EOF --- +tool: codex model: $CODEX_MODEL effort: $CODEX_EFFORT timeout: $CODEX_TIMEOUT @@ -343,6 +345,7 @@ if [[ $CODEX_EXIT_CODE -ne 0 ]]; then # Save metadata cat > "$SKILL_DIR/metadata.md" << EOF --- +tool: codex model: $CODEX_MODEL effort: $CODEX_EFFORT timeout: $CODEX_TIMEOUT @@ -368,6 +371,7 @@ if [[ ! -s "$CODEX_STDOUT_FILE" ]]; then cat > "$SKILL_DIR/metadata.md" << EOF --- +tool: codex model: $CODEX_MODEL effort: $CODEX_EFFORT timeout: $CODEX_TIMEOUT @@ -390,6 +394,7 @@ cp "$CODEX_STDOUT_FILE" "$SKILL_DIR/output.md" # Save metadata cat > "$SKILL_DIR/metadata.md" << EOF --- +tool: codex model: $CODEX_MODEL effort: $CODEX_EFFORT timeout: $CODEX_TIMEOUT diff --git a/scripts/ask-gemini.sh b/scripts/ask-gemini.sh new file mode 100755 index 00000000..890260a7 --- /dev/null +++ b/scripts/ask-gemini.sh @@ -0,0 +1,388 @@ +#!/usr/bin/env bash +# +# Ask Gemini - One-shot consultation with Gemini CLI +# +# Sends a question or task to gemini in non-interactive mode and returns +# the response. Gemini is always instructed to leverage Google Search +# for deep web research. +# +# Usage: +# ask-gemini.sh [--gemini-model MODEL] [--gemini-timeout SECONDS] [question...] +# +# Output: +# stdout: Gemini's response (for Claude to read) +# stderr: Status/debug info (model, log paths) +# +# Storage: +# Project-local: .humanize/skill/<unique-id>/{input,output,metadata}.md +# Cache: ~/.cache/humanize/<sanitized-path>/skill-<unique-id>/gemini-run.{cmd,out,log} +# + +set -euo pipefail + +# ======================================== +# Source Shared Libraries +# ======================================== + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" + +# Source portable timeout wrapper +source "$SCRIPT_DIR/portable-timeout.sh" + +# Shared project-root resolver (CLAUDE_PROJECT_DIR -> git toplevel, realpath-canonical) +source "$SCRIPT_DIR/../hooks/lib/project-root.sh" + +# ======================================== +# Default Configuration +# ======================================== + +DEFAULT_GEMINI_MODEL="gemini-3.1-pro-preview" +DEFAULT_ASK_GEMINI_TIMEOUT=3600 + +GEMINI_MODEL="$DEFAULT_GEMINI_MODEL" +GEMINI_TIMEOUT="$DEFAULT_ASK_GEMINI_TIMEOUT" + +# ======================================== +# Help +# ======================================== + +show_help() { + cat << 'HELP_EOF' +ask-gemini - One-shot deep-research consultation with Gemini + +USAGE: + /humanize:ask-gemini [OPTIONS] <question or task> + +OPTIONS: + --gemini-model <MODEL> + Gemini model name (default: gemini-3.1-pro-preview) + --gemini-timeout <SECONDS> + Timeout for the Gemini query in seconds (default: 3600) + -h, --help Show this help message + +DESCRIPTION: + Sends a one-shot question or task to the Gemini CLI in non-interactive + mode (-p). The prompt is augmented with an instruction to perform web + research via Google Search, making this ideal for deep-research tasks + that benefit from up-to-date internet information. + + The response is saved to .humanize/skill/<unique-id>/output.md for reference. + +EXAMPLES: + /humanize:ask-gemini What are the latest best practices for Rust error handling? + /humanize:ask-gemini --gemini-model gemini-2.5-pro Review recent CVEs for OpenSSL 3.x + /humanize:ask-gemini --gemini-timeout 600 Compare React Server Components vs Astro Islands + +ENVIRONMENT: + HUMANIZE_GEMINI_YOLO + Set to "true" or "1" to auto-approve all Gemini tool calls (--yolo). + Default behaviour uses --sandbox mode. +HELP_EOF + exit 0 +} + +# ======================================== +# Parse Arguments +# ======================================== + +QUESTION_PARTS=() +OPTIONS_DONE=false + +while [[ $# -gt 0 ]]; do + if [[ "$OPTIONS_DONE" == "true" ]]; then + QUESTION_PARTS+=("$1") + shift + continue + fi + case $1 in + -h|--help) + show_help + ;; + --) + OPTIONS_DONE=true + shift + ;; + --gemini-model) + if [[ -z "${2:-}" ]]; then + echo "Error: --gemini-model requires a MODEL argument" >&2 + exit 1 + fi + GEMINI_MODEL="$2" + shift 2 + ;; + --gemini-timeout) + if [[ -z "${2:-}" ]]; then + echo "Error: --gemini-timeout requires a number argument (seconds)" >&2 + exit 1 + fi + if ! [[ "$2" =~ ^[0-9]+$ ]]; then + echo "Error: --gemini-timeout must be a positive integer (seconds), got: $2" >&2 + exit 1 + fi + GEMINI_TIMEOUT="$2" + shift 2 + ;; + -*) + echo "Error: Unknown option: $1" >&2 + echo "Use --help for usage information" >&2 + exit 1 + ;; + *) + QUESTION_PARTS+=("$1") + OPTIONS_DONE=true + shift + ;; + esac +done + +# Join question parts into a single string +QUESTION="${QUESTION_PARTS[*]}" + +# ======================================== +# Validate Prerequisites +# ======================================== + +if ! command -v gemini &>/dev/null; then + echo "Error: 'gemini' command is not installed or not in PATH" >&2 + echo "" >&2 + echo "Please install Gemini CLI: npm install -g @google/gemini-cli or https://github.com/google-gemini/gemini-cli" >&2 + echo "Then retry: /humanize:ask-gemini <your question>" >&2 + exit 1 +fi + +if [[ -z "$QUESTION" ]]; then + echo "Error: No question or task provided" >&2 + echo "" >&2 + echo "Usage: /humanize:ask-gemini [OPTIONS] <question or task>" >&2 + echo "" >&2 + echo "For help: /humanize:ask-gemini --help" >&2 + exit 1 +fi + +# Validate model name for safety (alphanumeric, hyphen, underscore, dot) +if [[ ! "$GEMINI_MODEL" =~ ^[a-zA-Z0-9._-]+$ ]]; then + echo "Error: Gemini model contains invalid characters" >&2 + echo " Model: $GEMINI_MODEL" >&2 + echo " Only alphanumeric, hyphen, underscore, dot allowed" >&2 + exit 1 +fi + +# ======================================== +# Detect Project Root +# ======================================== + +PROJECT_ROOT="$(resolve_project_root)" || { + echo "Error: Cannot determine project root." >&2 + echo " Set CLAUDE_PROJECT_DIR or run inside a git repository." >&2 + exit 1 +} + +# ======================================== +# Create Storage Directories +# ======================================== + +TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S) +UNIQUE_ID="${TIMESTAMP}-$$-$(head -c 4 /dev/urandom | od -An -tx1 | tr -d ' \n')" + +# Project-local storage: .humanize/skill/<unique-id>/ +SKILL_DIR="$PROJECT_ROOT/.humanize/skill/$UNIQUE_ID" +mkdir -p "$SKILL_DIR" + +# Cache storage: ~/.cache/humanize/<sanitized-path>/skill-<unique-id>/ +SANITIZED_PROJECT_PATH=$(echo "$PROJECT_ROOT" | sed 's/[^a-zA-Z0-9._-]/-/g' | sed 's/--*/-/g') +CACHE_BASE="${XDG_CACHE_HOME:-$HOME/.cache}" +CACHE_DIR="$CACHE_BASE/humanize/$SANITIZED_PROJECT_PATH/skill-$UNIQUE_ID" +if ! mkdir -p "$CACHE_DIR" 2>/dev/null; then + CACHE_DIR="$SKILL_DIR/cache" + mkdir -p "$CACHE_DIR" + echo "ask-gemini: warning: home cache not writable, using $CACHE_DIR" >&2 +fi + +# ======================================== +# Save Input +# ======================================== + +cat > "$SKILL_DIR/input.md" << EOF +# Ask Gemini Input + +## Question + +$QUESTION + +## Configuration + +- Model: $GEMINI_MODEL +- Timeout: ${GEMINI_TIMEOUT}s +- Timestamp: $TIMESTAMP +- Tool: gemini +EOF + +# ======================================== +# Build Gemini Command +# ======================================== + +GEMINI_ARGS=("-m" "$GEMINI_MODEL") + +# Determine approval mode +if [[ "${HUMANIZE_GEMINI_YOLO:-}" == "true" ]] || [[ "${HUMANIZE_GEMINI_YOLO:-}" == "1" ]]; then + GEMINI_ARGS+=("--yolo") +else + GEMINI_ARGS+=("--sandbox") +fi + +# Use text output format for clean stdout +GEMINI_ARGS+=("-o" "text") + +# Build the augmented prompt with web-search instruction +AUGMENTED_PROMPT="You MUST use Google Search to find the most up-to-date and accurate information before answering. Perform thorough web research. Cite sources where possible. + +--- + +$QUESTION" + +# ======================================== +# Save Debug Command +# ======================================== + +GEMINI_CMD_FILE="$CACHE_DIR/gemini-run.cmd" +GEMINI_STDOUT_FILE="$CACHE_DIR/gemini-run.out" +GEMINI_STDERR_FILE="$CACHE_DIR/gemini-run.log" + +{ + echo "# Gemini ask-gemini invocation debug info" + echo "# Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" + echo "# Working directory: $PROJECT_ROOT" + echo "# Timeout: $GEMINI_TIMEOUT seconds" + echo "" + echo "gemini ${GEMINI_ARGS[*]} -p \"<prompt>\"" + echo "" + echo "# Prompt content:" + echo "$AUGMENTED_PROMPT" +} > "$GEMINI_CMD_FILE" + +# ======================================== +# Run Gemini +# ======================================== + +echo "ask-gemini: model=$GEMINI_MODEL timeout=${GEMINI_TIMEOUT}s" >&2 +echo "ask-gemini: cache=$CACHE_DIR" >&2 +echo "ask-gemini: running gemini -p ..." >&2 + +# Portable epoch-to-ISO8601 formatter +epoch_to_iso() { + local epoch="$1" + date -u -d "@$epoch" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || + date -u -r "$epoch" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || + echo "unknown" +} + +START_TIME=$(date +%s) + +GEMINI_EXIT_CODE=0 +run_with_timeout "$GEMINI_TIMEOUT" gemini "${GEMINI_ARGS[@]}" -p "$AUGMENTED_PROMPT" \ + > "$GEMINI_STDOUT_FILE" 2> "$GEMINI_STDERR_FILE" || GEMINI_EXIT_CODE=$? + +END_TIME=$(date +%s) +DURATION=$((END_TIME - START_TIME)) + +echo "ask-gemini: exit_code=$GEMINI_EXIT_CODE duration=${DURATION}s" >&2 + +# ======================================== +# Handle Results +# ======================================== + +if [[ $GEMINI_EXIT_CODE -eq 124 ]]; then + echo "Error: Gemini timed out after ${GEMINI_TIMEOUT} seconds" >&2 + echo "" >&2 + echo "Try increasing the timeout:" >&2 + echo " /humanize:ask-gemini --gemini-timeout $((GEMINI_TIMEOUT * 2)) <your question>" >&2 + echo "" >&2 + echo "Debug logs: $CACHE_DIR" >&2 + + cat > "$SKILL_DIR/metadata.md" << EOF +--- +tool: gemini +model: $GEMINI_MODEL +timeout: $GEMINI_TIMEOUT +exit_code: 124 +duration: ${DURATION}s +status: timeout +started_at: $(epoch_to_iso "$START_TIME") +--- +EOF + exit 124 +fi + +if [[ $GEMINI_EXIT_CODE -ne 0 ]]; then + echo "Error: Gemini exited with code $GEMINI_EXIT_CODE" >&2 + if [[ -s "$GEMINI_STDERR_FILE" ]]; then + echo "" >&2 + echo "Gemini stderr (last 20 lines):" >&2 + tail -20 "$GEMINI_STDERR_FILE" >&2 + fi + echo "" >&2 + echo "Debug logs: $CACHE_DIR" >&2 + + cat > "$SKILL_DIR/metadata.md" << EOF +--- +tool: gemini +model: $GEMINI_MODEL +timeout: $GEMINI_TIMEOUT +exit_code: $GEMINI_EXIT_CODE +duration: ${DURATION}s +status: error +started_at: $(epoch_to_iso "$START_TIME") +--- +EOF + exit "$GEMINI_EXIT_CODE" +fi + +if [[ ! -s "$GEMINI_STDOUT_FILE" ]]; then + echo "Error: Gemini returned empty response" >&2 + if [[ -s "$GEMINI_STDERR_FILE" ]]; then + echo "" >&2 + echo "Gemini stderr (last 20 lines):" >&2 + tail -20 "$GEMINI_STDERR_FILE" >&2 + fi + echo "" >&2 + echo "Debug logs: $CACHE_DIR" >&2 + + cat > "$SKILL_DIR/metadata.md" << EOF +--- +tool: gemini +model: $GEMINI_MODEL +timeout: $GEMINI_TIMEOUT +exit_code: 0 +duration: ${DURATION}s +status: empty_response +started_at: $(epoch_to_iso "$START_TIME") +--- +EOF + exit 1 +fi + +# ======================================== +# Save Output and Metadata +# ======================================== + +cp "$GEMINI_STDOUT_FILE" "$SKILL_DIR/output.md" + +cat > "$SKILL_DIR/metadata.md" << EOF +--- +tool: gemini +model: $GEMINI_MODEL +timeout: $GEMINI_TIMEOUT +exit_code: 0 +duration: ${DURATION}s +status: success +started_at: $(epoch_to_iso "$START_TIME") +--- +EOF + +echo "ask-gemini: response saved to $SKILL_DIR/output.md" >&2 + +# ======================================== +# Output Response +# ======================================== + +cat "$GEMINI_STDOUT_FILE" diff --git a/scripts/bitlesson-init.sh b/scripts/bitlesson-init.sh index 329e47df..5e454ded 100755 --- a/scripts/bitlesson-init.sh +++ b/scripts/bitlesson-init.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail diff --git a/scripts/bitlesson-select.sh b/scripts/bitlesson-select.sh index 9399b06c..fd19a445 100755 --- a/scripts/bitlesson-select.sh +++ b/scripts/bitlesson-select.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail @@ -9,12 +9,21 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" source "$SCRIPT_DIR/lib/config-loader.sh" source "$SCRIPT_DIR/lib/model-router.sh" +source "$SCRIPT_DIR/../hooks/lib/project-root.sh" PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(git rev-parse --show-toplevel 2>/dev/null || pwd)}" +PROJECT_ROOT="$(resolve_project_root)" || { + echo "Error: Cannot determine project root." >&2 + echo " Set CLAUDE_PROJECT_DIR or run inside a git repository." >&2 + exit 1 +} MERGED_CONFIG="$(load_merged_config "$PLUGIN_ROOT" "$PROJECT_ROOT")" BITLESSON_MODEL="$(get_config_value "$MERGED_CONFIG" "bitlesson_model")" BITLESSON_MODEL="${BITLESSON_MODEL:-haiku}" +CODEX_FALLBACK_MODEL="$(get_config_value "$MERGED_CONFIG" "codex_model")" +CODEX_FALLBACK_MODEL="${CODEX_FALLBACK_MODEL:-$DEFAULT_CODEX_MODEL}" +PROVIDER_MODE="$(get_config_value "$MERGED_CONFIG" "provider_mode")" +PROVIDER_MODE="${PROVIDER_MODE:-auto}" # Source portable timeout wrapper source "$SCRIPT_DIR/portable-timeout.sh" @@ -82,12 +91,34 @@ if [[ -z "$BITLESSON_FILE" ]]; then exit 1 fi +if [[ ! -f "$BITLESSON_FILE" ]]; then + echo "Error: BitLesson file not found: $BITLESSON_FILE" >&2 + exit 1 +fi + +BITLESSON_CONTENT="$(cat "$BITLESSON_FILE")" +if [[ -z "$(printf '%s' "$BITLESSON_CONTENT" | tr -d ' \t\n\r')" ]]; then + echo "Error: BitLesson file is empty (whitespace only): $BITLESSON_FILE" >&2 + exit 1 +fi + +if ! printf '%s\n' "$BITLESSON_CONTENT" | grep -Eq '^[[:space:]]*##[[:space:]]+Lesson:'; then + printf 'LESSON_IDS: NONE\n' + printf 'RATIONALE: The BitLesson file has no recorded lessons yet.\n' + exit 0 +fi + # ======================================== # Determine Provider from BITLESSON_MODEL # ======================================== BITLESSON_PROVIDER="$(detect_provider "$BITLESSON_MODEL")" +if [[ "$PROVIDER_MODE" == "codex-only" ]] && [[ "$BITLESSON_PROVIDER" == "claude" ]]; then + BITLESSON_MODEL="$CODEX_FALLBACK_MODEL" + BITLESSON_PROVIDER="codex" +fi + # ======================================== # Conditional Dependency Check (with fallback) # ======================================== @@ -99,17 +130,6 @@ if ! check_provider_dependency "$BITLESSON_PROVIDER" 2>/dev/null; then check_provider_dependency "$BITLESSON_PROVIDER" fi -if [[ ! -f "$BITLESSON_FILE" ]]; then - echo "Error: BitLesson file not found: $BITLESSON_FILE" >&2 - exit 1 -fi - -BITLESSON_CONTENT="$(cat "$BITLESSON_FILE")" -if [[ -z "$(printf '%s' "$BITLESSON_CONTENT" | tr -d ' \t\n\r')" ]]; then - echo "Error: BitLesson file is empty (whitespace only): $BITLESSON_FILE" >&2 - exit 1 -fi - # ======================================== # Detect Project Root (for -C) # ======================================== @@ -148,6 +168,7 @@ $BITLESSON_CONTENT 1. Match only lessons that are directly relevant to the sub-task scope and failure mode. 2. Prefer precision over recall: do not include weakly related lessons. 3. If nothing is relevant, return \`NONE\`. +4. Use only the information in this prompt. Do not use tools, shell commands, browser access, MCP servers, or repository inspection. ## Output Format (Stable) @@ -164,21 +185,44 @@ EOF SELECTOR_TIMEOUT=120 -CODEX_EXIT_CODE=0 -if [[ "$BITLESSON_PROVIDER" == "codex" ]]; then - CODEX_EXEC_ARGS=("-m" "$BITLESSON_MODEL" "-c" "model_reasoning_effort=high") +run_selector() { + local provider="$1" + local model="$2" + + if [[ "$provider" == "codex" ]]; then + local codex_exec_args=() + # Probe whether the installed Codex CLI supports --disable flag + if codex --help 2>&1 | grep -q -- '--disable'; then + codex_exec_args+=("--disable" "codex_hooks") + fi + # Probe for --skip-git-repo-check and --ephemeral support + if codex exec --help 2>&1 | grep -q -- '--skip-git-repo-check'; then + codex_exec_args+=("--skip-git-repo-check") + fi + if codex exec --help 2>&1 | grep -q -- '--ephemeral'; then + codex_exec_args+=("--ephemeral") + fi + codex_exec_args+=( + "-s" "read-only" + "-m" "$model" + "-c" "model_reasoning_effort=low" + "-C" "$CODEX_PROJECT_ROOT" + ) + printf '%s' "$PROMPT" | run_with_timeout "$SELECTOR_TIMEOUT" codex exec "${codex_exec_args[@]}" - + return $? + fi - # Determine automation flag based on environment variable (same as ask-codex.sh) - CODEX_AUTO_FLAG="--full-auto" - if [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "true" ]] || [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "1" ]]; then - CODEX_AUTO_FLAG="--dangerously-bypass-approvals-and-sandbox" + if [[ "$provider" == "claude" ]]; then + printf '%s' "$PROMPT" | run_with_timeout "$SELECTOR_TIMEOUT" claude --print --model "$model" - + return $? fi - CODEX_EXEC_ARGS+=("$CODEX_AUTO_FLAG" "-C" "$CODEX_PROJECT_ROOT") - RAW_OUTPUT="$(printf '%s' "$PROMPT" | run_with_timeout "$SELECTOR_TIMEOUT" codex exec "${CODEX_EXEC_ARGS[@]}" -)" || CODEX_EXIT_CODE=$? -elif [[ "$BITLESSON_PROVIDER" == "claude" ]]; then - RAW_OUTPUT="$(printf '%s' "$PROMPT" | run_with_timeout "$SELECTOR_TIMEOUT" claude --print --model "$BITLESSON_MODEL" -)" || CODEX_EXIT_CODE=$? -fi + echo "Error: Unsupported BitLesson provider '$provider'" >&2 + return 1 +} + +CODEX_EXIT_CODE=0 +RAW_OUTPUT="$(run_selector "$BITLESSON_PROVIDER" "$BITLESSON_MODEL" 2>&1)" || CODEX_EXIT_CODE=$? if [[ $CODEX_EXIT_CODE -eq 124 ]]; then echo "Error: BitLesson selector timed out after ${SELECTOR_TIMEOUT} seconds" >&2 @@ -187,6 +231,7 @@ fi if [[ $CODEX_EXIT_CODE -ne 0 ]]; then echo "Error: BitLesson selector failed (exit code $CODEX_EXIT_CODE)" >&2 + printf '%s\n' "$RAW_OUTPUT" >&2 exit "$CODEX_EXIT_CODE" fi diff --git a/scripts/bitlesson-validate-delta.sh b/scripts/bitlesson-validate-delta.sh index ee786093..648303b0 100755 --- a/scripts/bitlesson-validate-delta.sh +++ b/scripts/bitlesson-validate-delta.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail usage() { diff --git a/scripts/cancel-pr-loop.sh b/scripts/cancel-pr-loop.sh deleted file mode 100755 index f01c606b..00000000 --- a/scripts/cancel-pr-loop.sh +++ /dev/null @@ -1,132 +0,0 @@ -#!/bin/bash -# -# Cancel script for cancel-pr-loop -# -# Cancels an active PR loop by creating a cancel signal file -# and renaming the state file to cancel-state.md. -# -# Usage: -# cancel-pr-loop.sh [--force] -# -# Exit codes: -# 0 - Successfully cancelled -# 1 - No active loop found -# 2 - Reserved for future use (e.g., confirmation required) -# 3 - Other error -# - -set -euo pipefail - -# ======================================== -# Parse Arguments -# ======================================== - -FORCE="false" - -while [[ $# -gt 0 ]]; do - case $1 in - --force) - FORCE="true" - shift - ;; - -h|--help) - cat << 'HELP_EOF' -cancel-pr-loop.sh - Cancel active PR loop - -USAGE: - cancel-pr-loop.sh [OPTIONS] - -OPTIONS: - --force Force cancel (currently has no additional effect) - -h, --help Show this help message - -EXIT CODES: - 0 - Successfully cancelled - 1 - No active loop found - 3 - Other error - -DESCRIPTION: - Cancels the active PR loop by: - 1. Finding the most recent PR loop directory - 2. Creating a .cancel-requested signal file - 3. Renaming state.md to cancel-state.md - -NOTE: - This command only affects PR loops (.humanize/pr-loop/). - RLCR loops (.humanize/rlcr/) are not affected. -HELP_EOF - exit 0 - ;; - *) - echo "Unknown option: $1" >&2 - echo "Use --help for usage information" >&2 - exit 3 - ;; - esac -done - -# ======================================== -# Find Loop Directory -# ======================================== - -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" -LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/pr-loop" - -# Find newest loop directory (different from RLCR - uses pr-loop instead of rlcr) -LOOP_DIR=$(ls -1d "$LOOP_BASE_DIR"/*/ 2>/dev/null | sort -r | head -1) || true - -if [[ -z "$LOOP_DIR" ]]; then - echo "NO_LOOP" - echo "No active PR loop found." - exit 1 -fi - -# ======================================== -# Check Loop State -# ======================================== - -STATE_FILE="$LOOP_DIR/state.md" -CANCEL_SIGNAL="$LOOP_DIR/.cancel-requested" - -if [[ -f "$STATE_FILE" ]]; then - LOOP_STATE="ACTIVE" - ACTIVE_STATE_FILE="$STATE_FILE" -else - echo "NO_ACTIVE_LOOP" - echo "No active PR loop found. The loop directory exists but no active state file is present." - exit 1 -fi - -# ======================================== -# Extract Round Info -# ======================================== - -# Extract current_round and max_iterations from the state file -CURRENT_ROUND=$(grep -E '^current_round:' "$ACTIVE_STATE_FILE" | sed 's/^current_round:[[:space:]]*//' | tr -d ' ') -MAX_ITERATIONS=$(grep -E '^max_iterations:' "$ACTIVE_STATE_FILE" | sed 's/^max_iterations:[[:space:]]*//' | tr -d ' ') -PR_NUMBER=$(grep -E '^pr_number:' "$ACTIVE_STATE_FILE" | sed 's/^pr_number:[[:space:]]*//' | tr -d ' ') - -# Default values if not found -CURRENT_ROUND=${CURRENT_ROUND:-"?"} -MAX_ITERATIONS=${MAX_ITERATIONS:-"?"} -PR_NUMBER=${PR_NUMBER:-"?"} - -# ======================================== -# Perform Cancellation -# ======================================== - -# Create cancel signal file -touch "$CANCEL_SIGNAL" - -# Rename state file to cancel-state.md -mv "$ACTIVE_STATE_FILE" "$LOOP_DIR/cancel-state.md" - -# ======================================== -# Output Result -# ======================================== - -echo "CANCELLED" -echo "Cancelled PR loop for PR #$PR_NUMBER (was at round $CURRENT_ROUND of $MAX_ITERATIONS)." -echo "State preserved as cancel-state.md" - -exit 0 diff --git a/scripts/cancel-rlcr-loop.sh b/scripts/cancel-rlcr-loop.sh index 907b051e..a835b875 100755 --- a/scripts/cancel-rlcr-loop.sh +++ b/scripts/cancel-rlcr-loop.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Cancel script for cancel-rlcr-loop # @@ -50,7 +50,7 @@ DESCRIPTION: Cancels the active RLCR loop by: 1. Finding the most recent loop directory 2. Creating a .cancel-requested signal file - 3. Renaming state.md or finalize-state.md to cancel-state.md + 3. Renaming state.md, methodology-analysis-state.md, or finalize-state.md to cancel-state.md HELP_EOF exit 0 ;; @@ -66,13 +66,17 @@ done # Find Loop Directory # ======================================== -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" -LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" - -# Source shared loop library for find_active_loop +# Source shared loop library for find_active_loop and resolve_project_root SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" source "$SCRIPT_DIR/../hooks/lib/loop-common.sh" +PROJECT_ROOT="$(resolve_project_root)" || { + echo "Error: Cannot determine humanize project root." >&2 + echo " Set CLAUDE_PROJECT_DIR or run inside a git repository." >&2 + exit 3 +} +LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" + # PRODUCT DECISION: Cancel operates globally (no session_id filtering). # # Cancel is invoked as a standalone Bash command via /cancel-rlcr-loop slash command. @@ -98,11 +102,15 @@ fi STATE_FILE="$LOOP_DIR/state.md" FINALIZE_STATE_FILE="$LOOP_DIR/finalize-state.md" +METHODOLOGY_ANALYSIS_STATE_FILE="$LOOP_DIR/methodology-analysis-state.md" CANCEL_SIGNAL="$LOOP_DIR/.cancel-requested" if [[ -f "$STATE_FILE" ]]; then LOOP_STATE="NORMAL_LOOP" ACTIVE_STATE_FILE="$STATE_FILE" +elif [[ -f "$METHODOLOGY_ANALYSIS_STATE_FILE" ]]; then + LOOP_STATE="METHODOLOGY_ANALYSIS_PHASE" + ACTIVE_STATE_FILE="$METHODOLOGY_ANALYSIS_STATE_FILE" elif [[ -f "$FINALIZE_STATE_FILE" ]]; then LOOP_STATE="FINALIZE_PHASE" ACTIVE_STATE_FILE="$FINALIZE_STATE_FILE" @@ -151,6 +159,9 @@ touch "$CANCEL_SIGNAL" # Clean up any pending session_id signal file (setup may not have completed) rm -f "$PROJECT_ROOT/.humanize/.pending-session-id" +# Clean up methodology analysis marker files if present +rm -f "$LOOP_DIR/.methodology-exit-reason" + # Rename state file to cancel-state.md mv "$ACTIVE_STATE_FILE" "$LOOP_DIR/cancel-state.md" @@ -162,6 +173,10 @@ if [[ "$LOOP_STATE" == "NORMAL_LOOP" ]]; then echo "CANCELLED" echo "Cancelled RLCR loop (was at round $CURRENT_ROUND of $MAX_ITERATIONS)." echo "State preserved as cancel-state.md" +elif [[ "$LOOP_STATE" == "METHODOLOGY_ANALYSIS_PHASE" ]]; then + echo "CANCELLED_METHODOLOGY_ANALYSIS" + echo "Cancelled RLCR loop during Methodology Analysis Phase (was at round $CURRENT_ROUND of $MAX_ITERATIONS)." + echo "State preserved as cancel-state.md" else echo "CANCELLED_FINALIZE" echo "Cancelled RLCR loop during Finalize Phase (was at round $CURRENT_ROUND of $MAX_ITERATIONS)." diff --git a/scripts/check-bot-reactions.sh b/scripts/check-bot-reactions.sh deleted file mode 100755 index 74a3372c..00000000 --- a/scripts/check-bot-reactions.sh +++ /dev/null @@ -1,308 +0,0 @@ -#!/bin/bash -# -# Check bot reactions on PR or comments -# -# Detects: -# - Codex +1 (thumbs-up) reaction on PR body (first round approval) -# - Claude eyes reaction on trigger comments (confirmation of receipt) -# -# Usage: -# check-bot-reactions.sh codex-thumbsup <pr_number> [--after <timestamp>] -# check-bot-reactions.sh claude-eyes <comment_id> [--retry <attempts>] [--delay <seconds>] -# -# Exit codes: -# 0 - Reaction found -# 1 - Reaction not found (or timeout after all retries) -# 2 - Error (API failure, missing arguments, etc.) - -set -euo pipefail - -# ======================================== -# Default Configuration -# ======================================== - -# Timeout for gh operations -GH_TIMEOUT=30 - -# Default retry settings for claude eyes -DEFAULT_MAX_RETRIES=3 -DEFAULT_RETRY_DELAY=5 - -# Source portable timeout wrapper -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -source "$SCRIPT_DIR/portable-timeout.sh" - -# ======================================== -# Helper Functions -# ======================================== - -show_help() { - cat << 'EOF' -check-bot-reactions.sh - Detect bot reactions on GitHub PRs and comments - -USAGE: - check-bot-reactions.sh codex-thumbsup <pr_number> [--after <timestamp>] - check-bot-reactions.sh claude-eyes <comment_id> [--retry <attempts>] [--delay <seconds>] - -COMMANDS: - codex-thumbsup Check for Codex +1 reaction on PR body - Returns reaction created_at timestamp if found - --after: Only count reaction if created after this timestamp - - claude-eyes Check for Claude eyes reaction on a specific comment - Retries with delay if not found immediately - --retry: Number of attempts (default: 3) - --delay: Seconds between attempts (default: 5) - -EXIT CODES: - 0 - Reaction found (outputs JSON with reaction info) - 1 - Reaction not found - 2 - Error (API failure, etc.) - -EXAMPLES: - # Check if Codex approved PR #123 with thumbs-up - check-bot-reactions.sh codex-thumbsup 123 - - # Check if Codex approved after loop started - check-bot-reactions.sh codex-thumbsup 123 --after "2026-01-18T10:00:00Z" - - # Wait for Claude eyes reaction on comment (15 seconds total) - check-bot-reactions.sh claude-eyes 12345678 --retry 3 --delay 5 -EOF - exit 0 -} - -# ======================================== -# Parse Arguments -# ======================================== - -COMMAND="${1:-}" -shift || true - -if [[ -z "$COMMAND" ]] || [[ "$COMMAND" == "-h" ]] || [[ "$COMMAND" == "--help" ]]; then - show_help -fi - -case "$COMMAND" in - codex-thumbsup) - # Parse codex-thumbsup arguments - PR_NUMBER="" - AFTER_TIMESTAMP="" - - while [[ $# -gt 0 ]]; do - case $1 in - --after) - AFTER_TIMESTAMP="$2" - shift 2 - ;; - -*) - echo "Error: Unknown option for codex-thumbsup: $1" >&2 - exit 2 - ;; - *) - if [[ -z "$PR_NUMBER" ]]; then - PR_NUMBER="$1" - else - echo "Error: Multiple PR numbers specified" >&2 - exit 2 - fi - shift - ;; - esac - done - - if [[ -z "$PR_NUMBER" ]]; then - echo "Error: PR number is required for codex-thumbsup" >&2 - exit 2 - fi - - # IMPORTANT: Use the PR's base repository for API calls (for fork PR support) - # Reactions are on the base repo, not the fork - # Strategy: Try current repo first, check if PR exists there, then try parent repo for forks - - # Step 1: Get current repo - CURRENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json owner,name \ - -q '.owner.login + "/" + .name' 2>/dev/null) || CURRENT_REPO="" - - # Step 2: Determine the correct repo for PR operations - # Try current repo first - if PR exists there, use it - PR_BASE_REPO="" - if [[ -n "$CURRENT_REPO" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" - fi - fi - - # Step 3: If PR not found in current repo, try parent repo (fork case) - if [[ -z "$PR_BASE_REPO" ]]; then - PARENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json parent \ - -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - fi - fi - fi - - # Step 4: Final fallback to current repo - if [[ -z "$PR_BASE_REPO" ]]; then - PR_BASE_REPO="$CURRENT_REPO" - fi - - # Fetch PR reactions (with pagination to catch all reactions) - # The PR body is treated as issue #PR_NUMBER, so we use the issues reactions endpoint - # IMPORTANT: Use PR_BASE_REPO for fork PR support - # IMPORTANT: Use --paginate to fetch all reactions (default is 30 per page) - # NOTE: --paginate with --jq emits one array per page; use jq -s 'add' to merge them - REACTIONS=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/$PR_NUMBER/reactions" \ - --paginate --jq '[.[] | {user: .user.login, content: .content, created_at: .created_at}]' 2>/dev/null \ - | jq -s 'add // []') || { - echo "Error: Failed to fetch PR reactions" >&2 - exit 2 - } - - # Look for Codex +1 reaction - # User login: chatgpt-codex-connector[bot] - CODEX_REACTION=$(echo "$REACTIONS" | jq -r ' - [.[] | select(.user == "chatgpt-codex-connector[bot]" and .content == "+1")] | .[0] // empty - ') - - if [[ "$CODEX_REACTION" == "null" ]] || [[ -z "$CODEX_REACTION" ]]; then - # No +1 reaction from Codex - exit 1 - fi - - REACTION_AT=$(echo "$CODEX_REACTION" | jq -r '.created_at') - - # If --after specified, check timestamp - if [[ -n "$AFTER_TIMESTAMP" ]]; then - if [[ "$REACTION_AT" < "$AFTER_TIMESTAMP" ]]; then - # Reaction exists but is older than specified timestamp - exit 1 - fi - fi - - # Output reaction info - echo "$CODEX_REACTION" - exit 0 - ;; - - claude-eyes) - # Parse claude-eyes arguments - COMMENT_ID="" - PR_NUMBER="" - MAX_RETRIES="$DEFAULT_MAX_RETRIES" - RETRY_DELAY="$DEFAULT_RETRY_DELAY" - - while [[ $# -gt 0 ]]; do - case $1 in - --retry) - MAX_RETRIES="$2" - shift 2 - ;; - --delay) - RETRY_DELAY="$2" - shift 2 - ;; - --pr) - PR_NUMBER="$2" - shift 2 - ;; - -*) - echo "Error: Unknown option for claude-eyes: $1" >&2 - exit 2 - ;; - *) - if [[ -z "$COMMENT_ID" ]]; then - COMMENT_ID="$1" - else - echo "Error: Multiple comment IDs specified" >&2 - exit 2 - fi - shift - ;; - esac - done - - if [[ -z "$COMMENT_ID" ]]; then - echo "Error: Comment ID is required for claude-eyes" >&2 - exit 2 - fi - - # IMPORTANT: Use the PR's base repository for API calls (for fork PR support) - # Reactions are on the base repo, not the fork - # Strategy: Try current repo first, check if PR exists there, then try parent repo for forks - - # Step 1: Get current repo - CURRENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json owner,name \ - -q '.owner.login + "/" + .name' 2>/dev/null) || CURRENT_REPO="" - - # Step 2: Determine the correct repo for PR operations (if PR number provided) - PR_BASE_REPO="" - if [[ -n "$PR_NUMBER" && -n "$CURRENT_REPO" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" - fi - fi - - # Step 3: If PR not found in current repo and PR number provided, try parent repo (fork case) - if [[ -z "$PR_BASE_REPO" && -n "$PR_NUMBER" ]]; then - PARENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json parent \ - -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - fi - fi - fi - - # Step 4: Final fallback to current repo - if [[ -z "$PR_BASE_REPO" ]]; then - PR_BASE_REPO="$CURRENT_REPO" - fi - - # Retry loop for eyes reaction - for attempt in $(seq 1 "$MAX_RETRIES"); do - # Wait before checking (gives Claude time to react) - sleep "$RETRY_DELAY" - - # Fetch comment reactions (with pagination to catch all reactions) - # IMPORTANT: Use PR_BASE_REPO for fork PR support - # IMPORTANT: Use --paginate to fetch all reactions (default is 30 per page) - # NOTE: --paginate with --jq emits one array per page; use jq -s 'add' to merge them - REACTIONS=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/comments/$COMMENT_ID/reactions" \ - --paginate --jq '[.[] | {user: .user.login, content: .content, created_at: .created_at}]' 2>/dev/null \ - | jq -s 'add // []') || { - # API error - continue to next attempt - continue - } - - # Look for Claude eyes reaction - # User login: claude[bot] - CLAUDE_REACTION=$(echo "$REACTIONS" | jq -r ' - [.[] | select(.user == "claude[bot]" and .content == "eyes")] | .[0] // empty - ') - - if [[ "$CLAUDE_REACTION" != "null" ]] && [[ -n "$CLAUDE_REACTION" ]]; then - # Found eyes reaction - echo "$CLAUDE_REACTION" - exit 0 - fi - - # Not found yet, will retry if attempts remain - if [[ $attempt -lt $MAX_RETRIES ]]; then - echo "Attempt $attempt/$MAX_RETRIES: Eyes not found, retrying..." >&2 - fi - done - - # All attempts exhausted - echo "No eyes reaction found after $MAX_RETRIES attempts ($(( MAX_RETRIES * RETRY_DELAY )) seconds total)" >&2 - exit 1 - ;; - - *) - echo "Error: Unknown command: $COMMAND" >&2 - echo "Use --help for usage information" >&2 - exit 2 - ;; -esac diff --git a/scripts/check-pr-reviewer-status.sh b/scripts/check-pr-reviewer-status.sh deleted file mode 100755 index cb3e31fd..00000000 --- a/scripts/check-pr-reviewer-status.sh +++ /dev/null @@ -1,275 +0,0 @@ -#!/bin/bash -# -# Check PR reviewer status for startup case determination -# -# Analyzes reviewer comments on ENTIRE PR (not just after latest commit) -# to determine which startup case applies. -# -# Usage: -# check-pr-reviewer-status.sh <pr_number> --bots <bot1,bot2> -# -# Output (JSON): -# { -# "case": 1-5, -# "reviewers_commented": ["claude"], -# "reviewers_missing": ["codex"], -# "latest_commit_sha": "abc123", -# "latest_commit_at": "2026-01-18T12:00:00Z", -# "newest_review_at": "2026-01-18T11:00:00Z", -# "has_commits_after_reviews": true -# } -# -# Cases: -# 1 - No reviewer comments at all -# 2 - Some (not all) reviewers commented -# 3 - All reviewers commented, no new commits after -# 4 - All reviewers commented, new commits after (needs re-review) -# 5 - All reviewers commented, new commits after (like case 4, for future distinction) - -set -euo pipefail - -# ======================================== -# Default Configuration -# ======================================== - -# Timeout for gh operations -GH_TIMEOUT=60 - -# Source portable timeout wrapper -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -source "$SCRIPT_DIR/portable-timeout.sh" - -# ======================================== -# Parse Arguments -# ======================================== - -PR_NUMBER="" -BOT_LIST="" - -while [[ $# -gt 0 ]]; do - case $1 in - --bots) - if [[ -z "${2:-}" ]]; then - echo "Error: --bots requires a comma-separated list of bot names" >&2 - exit 1 - fi - BOT_LIST="$2" - shift 2 - ;; - -*) - echo "Error: Unknown option: $1" >&2 - exit 1 - ;; - *) - if [[ -z "$PR_NUMBER" ]]; then - PR_NUMBER="$1" - else - echo "Error: Multiple PR numbers specified" >&2 - exit 1 - fi - shift - ;; - esac -done - -if [[ -z "$PR_NUMBER" ]]; then - echo "Error: PR number is required" >&2 - echo "Usage: check-pr-reviewer-status.sh <pr_number> --bots <bot1,bot2>" >&2 - exit 1 -fi - -if [[ -z "$BOT_LIST" ]]; then - echo "Error: --bots is required" >&2 - echo "Usage: check-pr-reviewer-status.sh <pr_number> --bots <bot1,bot2>" >&2 - exit 1 -fi - -# ======================================== -# Bot Name Mapping -# ======================================== - -# Map bot names to GitHub comment author names: -# - claude -> claude[bot] -# - codex -> chatgpt-codex-connector[bot] -map_bot_to_author() { - local bot="$1" - case "$bot" in - codex) echo "chatgpt-codex-connector[bot]" ;; - *) echo "${bot}[bot]" ;; - esac -} - -# ======================================== -# Fetch PR Data -# ======================================== - -# Parse bot list into array -IFS=',' read -ra BOTS <<< "$BOT_LIST" - -# IMPORTANT: For fork PRs, we need to resolve the base (upstream) repository -# gh pr view without --repo fails in forks because the PR number doesn't exist there -# Strategy: First get current repo, check if PR exists there, then try parent repo for forks - -# Step 1: Get the current repo (works in both forks and base repos) -CURRENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json owner,name \ - -q '.owner.login + "/" + .name' 2>/dev/null) || CURRENT_REPO="" - -# Step 2: Determine the correct repo for PR operations -# Try current repo first - if PR exists there, use it -PR_BASE_REPO="" -if [[ -n "$CURRENT_REPO" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - # PR not found in current repo - check if this is a fork and try parent repo - PARENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json parent \ - -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - fi - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - echo "Warning: Could not resolve PR base repository, using current repo" >&2 - PR_BASE_REPO="$CURRENT_REPO" -fi - -# Get latest commit info (use --repo for fork support) -COMMIT_INFO=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_BASE_REPO" \ - --json headRefOid,commits \ - --jq '{sha: .headRefOid, date: (.commits | sort_by(.committedDate) | last | .committedDate)}' 2>/dev/null) || { - echo "Error: Failed to fetch PR commit info" >&2 - exit 1 -} - -LATEST_COMMIT_SHA=$(echo "$COMMIT_INFO" | jq -r '.sha') -LATEST_COMMIT_AT=$(echo "$COMMIT_INFO" | jq -r '.date') - -# Fetch all comments (issue comments, review comments, and PR review submissions) -# Using --paginate to handle PRs with many comments -# IMPORTANT: Use PR_BASE_REPO for fork PR support -ISSUE_COMMENTS=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/$PR_NUMBER/comments" \ - --paginate --jq '[.[] | {author: .user.login, created_at: .created_at, body: .body}]' 2>/dev/null) || ISSUE_COMMENTS="[]" - -REVIEW_COMMENTS=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/pulls/$PR_NUMBER/comments" \ - --paginate --jq '[.[] | {author: .user.login, created_at: .created_at, body: .body}]' 2>/dev/null) || REVIEW_COMMENTS="[]" - -# Also fetch PR review submissions (APPROVE, REQUEST_CHANGES, COMMENT reviews) -# These are different from inline review comments and may be the only feedback from some bots -PR_REVIEWS=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/pulls/$PR_NUMBER/reviews" \ - --paginate --jq '[.[] | {author: .user.login, created_at: .submitted_at, body: .body, state: .state}]' 2>/dev/null) || PR_REVIEWS="[]" - -# Combine all comments and reviews -ALL_COMMENTS=$(echo "$ISSUE_COMMENTS $REVIEW_COMMENTS $PR_REVIEWS" | jq -s 'add // []') - -# ======================================== -# Analyze Comments by Bot -# ======================================== - -declare -a REVIEWERS_COMMENTED=() -declare -a REVIEWERS_MISSING=() -declare -a REVIEWERS_STALE=() # Bots whose latest review is before latest commit -NEWEST_REVIEW_AT="" - -for bot in "${BOTS[@]}"; do - author=$(map_bot_to_author "$bot") - - # Check if this bot has any comments - BOT_COMMENTS=$(echo "$ALL_COMMENTS" | jq --arg author "$author" '[.[] | select(.author == $author)]') - BOT_COUNT=$(echo "$BOT_COMMENTS" | jq 'length') - - if [[ "$BOT_COUNT" -gt 0 ]]; then - REVIEWERS_COMMENTED+=("$bot") - - # Track this bot's newest review timestamp - BOT_NEWEST=$(echo "$BOT_COMMENTS" | jq -r 'sort_by(.created_at) | reverse | .[0].created_at') - - # Check if this bot's review is stale (before latest commit) - # This is per-bot, not global - a bot's review can be stale even if another bot reviewed later - if [[ -n "$LATEST_COMMIT_AT" && -n "$BOT_NEWEST" && "$LATEST_COMMIT_AT" > "$BOT_NEWEST" ]]; then - REVIEWERS_STALE+=("$bot") - fi - - # Track global newest for output (still useful for debugging) - if [[ -z "$NEWEST_REVIEW_AT" ]] || [[ "$BOT_NEWEST" > "$NEWEST_REVIEW_AT" ]]; then - NEWEST_REVIEW_AT="$BOT_NEWEST" - fi - else - REVIEWERS_MISSING+=("$bot") - fi -done - -# ======================================== -# Determine Case -# ======================================== - -CASE=0 -HAS_COMMITS_AFTER_REVIEWS=false - -# Count how many bots have commented -COMMENTED_COUNT=${#REVIEWERS_COMMENTED[@]} -MISSING_COUNT=${#REVIEWERS_MISSING[@]} -STALE_COUNT=${#REVIEWERS_STALE[@]} -TOTAL_BOTS=${#BOTS[@]} - -if [[ $COMMENTED_COUNT -eq 0 ]]; then - # Case 1: No reviewer comments at all - CASE=1 -elif [[ $MISSING_COUNT -gt 0 ]]; then - # Some (not all) reviewers commented - # Check if ANY bot that commented has a stale review (per-bot check) - if [[ $STALE_COUNT -gt 0 ]]; then - # Case 5: Some reviewers commented, but at least one has stale review - HAS_COMMITS_AFTER_REVIEWS=true - CASE=5 - else - # Case 2: Some reviewers commented, all reviews are fresh - CASE=2 - fi -else - # All reviewers have commented - # Check if ANY bot has a stale review (per-bot check, not global newest) - if [[ $STALE_COUNT -gt 0 ]]; then - # Case 4: All reviewers commented, but at least one has stale review - HAS_COMMITS_AFTER_REVIEWS=true - CASE=4 - else - # Case 3: All commented, all reviews are fresh - CASE=3 - fi -fi - -# ======================================== -# Output JSON -# ======================================== - -# Build JSON arrays -COMMENTED_JSON=$(printf '%s\n' "${REVIEWERS_COMMENTED[@]}" | jq -R . | jq -s .) -MISSING_JSON=$(printf '%s\n' "${REVIEWERS_MISSING[@]}" | jq -R . | jq -s .) - -# Handle empty arrays -[[ ${#REVIEWERS_COMMENTED[@]} -eq 0 ]] && COMMENTED_JSON="[]" -[[ ${#REVIEWERS_MISSING[@]} -eq 0 ]] && MISSING_JSON="[]" - -jq -n \ - --argjson case "$CASE" \ - --argjson reviewers_commented "$COMMENTED_JSON" \ - --argjson reviewers_missing "$MISSING_JSON" \ - --arg latest_commit_sha "$LATEST_COMMIT_SHA" \ - --arg latest_commit_at "$LATEST_COMMIT_AT" \ - --arg newest_review_at "${NEWEST_REVIEW_AT:-null}" \ - --argjson has_commits_after_reviews "$HAS_COMMITS_AFTER_REVIEWS" \ - '{ - case: $case, - reviewers_commented: $reviewers_commented, - reviewers_missing: $reviewers_missing, - latest_commit_sha: $latest_commit_sha, - latest_commit_at: $latest_commit_at, - newest_review_at: (if $newest_review_at == "null" then null else $newest_review_at end), - has_commits_after_reviews: $has_commits_after_reviews - }' diff --git a/scripts/fetch-pr-comments.sh b/scripts/fetch-pr-comments.sh deleted file mode 100755 index 2dc996b8..00000000 --- a/scripts/fetch-pr-comments.sh +++ /dev/null @@ -1,452 +0,0 @@ -#!/bin/bash -# -# Fetch PR comments from GitHub -# -# Fetches all types of PR comments: -# - Issue comments (general comments on the PR) -# - Review comments (inline code comments) -# - PR reviews (summary reviews with approval/rejection status) -# -# Usage: -# fetch-pr-comments.sh <pr_number> <output_file> [--after <timestamp>] -# -# Output: Formatted markdown file with all comments -# - -set -euo pipefail - -# ======================================== -# Parse Arguments -# ======================================== - -PR_NUMBER="" -OUTPUT_FILE="" -AFTER_TIMESTAMP="" -ACTIVE_BOTS="" # Comma-separated list of active bots for grouping - -while [[ $# -gt 0 ]]; do - case $1 in - --after) - if [[ -z "${2:-}" ]]; then - echo "Error: --after requires a timestamp argument" >&2 - exit 1 - fi - AFTER_TIMESTAMP="$2" - shift 2 - ;; - --bots) - if [[ -z "${2:-}" ]]; then - echo "Error: --bots requires a comma-separated list of bot names" >&2 - exit 1 - fi - ACTIVE_BOTS="$2" - shift 2 - ;; - -h|--help) - cat << 'HELP_EOF' -fetch-pr-comments.sh - Fetch PR comments from GitHub - -USAGE: - fetch-pr-comments.sh <pr_number> <output_file> [OPTIONS] - -ARGUMENTS: - <pr_number> The PR number to fetch comments from - <output_file> Path to write the formatted comments - -OPTIONS: - --after <timestamp> Only include comments after this ISO 8601 timestamp - --bots <bot1,bot2> Comma-separated list of active bots for grouping - -h, --help Show this help message - -OUTPUT FORMAT: - The output file contains markdown-formatted comments with: - - Comment type (issue comment, review comment, PR review) - - Author (with [bot] indicator for bot accounts) - - Timestamp - - Content - - Comments are deduplicated by ID and sorted newest first. - Human comments come before bot comments. - If --bots is provided, bot comments are grouped by bot. -HELP_EOF - exit 0 - ;; - -*) - echo "Error: Unknown option: $1" >&2 - exit 1 - ;; - *) - if [[ -z "$PR_NUMBER" ]]; then - PR_NUMBER="$1" - elif [[ -z "$OUTPUT_FILE" ]]; then - OUTPUT_FILE="$1" - else - echo "Error: Unexpected argument: $1" >&2 - exit 1 - fi - shift - ;; - esac -done - -# Validate arguments -if [[ -z "$PR_NUMBER" ]]; then - echo "Error: PR number is required" >&2 - exit 1 -fi - -if [[ -z "$OUTPUT_FILE" ]]; then - echo "Error: Output file is required" >&2 - exit 1 -fi - -if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Error: Invalid PR number: $PR_NUMBER" >&2 - exit 1 -fi - -# ======================================== -# Check Prerequisites -# ======================================== - -if ! command -v gh &>/dev/null; then - echo "Error: GitHub CLI (gh) is required" >&2 - exit 1 -fi - -if ! command -v jq &>/dev/null; then - echo "Error: jq is required for JSON parsing" >&2 - exit 1 -fi - -# ======================================== -# Get Repository Info -# ======================================== - -# IMPORTANT: For fork PRs, we need to resolve the base (upstream) repository -# gh pr view without --repo fails in forks because the PR number doesn't exist there -# Strategy: First get current repo, check if PR exists there, then try parent repo for forks - -# Step 1: Get the current repo (works in both forks and base repos) -CURRENT_REPO=$(gh repo view --json owner,name -q '.owner.login + "/" + .name' 2>/dev/null) || { - echo "Error: Failed to get current repository" >&2 - exit 1 -} - -# Step 2: Determine the correct repo for PR operations -# Try current repo first - if PR exists there, use it -PR_BASE_REPO="" -if gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" -else - # PR not found in current repo - check if this is a fork and try parent repo - PARENT_REPO=$(gh repo view --json parent -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - fi - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - echo "Error: Failed to find PR #$PR_NUMBER in current or parent repository" >&2 - exit 1 -fi - -REPO_OWNER="${PR_BASE_REPO%%/*}" -REPO_NAME="${PR_BASE_REPO##*/}" - -if [[ -z "$REPO_OWNER" || -z "$REPO_NAME" ]]; then - echo "Error: Could not parse repository owner/name from: $PR_BASE_REPO" >&2 - exit 1 -fi - -# ======================================== -# Fetch Comments -# ======================================== - -# Create temporary files for each comment type -TEMP_DIR=$(mktemp -d) -trap 'rm -rf "$TEMP_DIR"' EXIT - -ISSUE_COMMENTS_FILE="$TEMP_DIR/issue_comments.json" -REVIEW_COMMENTS_FILE="$TEMP_DIR/review_comments.json" -PR_REVIEWS_FILE="$TEMP_DIR/pr_reviews.json" - -# Retry configuration -MAX_RETRIES=3 -RETRY_DELAY=2 - -# Track API failures for strict mode -API_FAILURES=0 - -# Function to fetch with retries -fetch_with_retry() { - local endpoint="$1" - local output_file="$2" - local description="$3" - local attempt=1 - - while [[ $attempt -le $MAX_RETRIES ]]; do - if gh api "$endpoint" --paginate > "$output_file" 2>/dev/null; then - return 0 - fi - - if [[ $attempt -lt $MAX_RETRIES ]]; then - echo "Warning: Failed to fetch $description (attempt $attempt/$MAX_RETRIES), retrying in ${RETRY_DELAY}s..." >&2 - sleep "$RETRY_DELAY" - else - echo "ERROR: Failed to fetch $description after $MAX_RETRIES attempts" >&2 - echo "[]" > "$output_file" - API_FAILURES=$((API_FAILURES + 1)) - # Return 0 so script continues under set -euo pipefail - # API_FAILURES counter tracks failures for strict mode if needed - return 0 - fi - ((attempt++)) - done -} - -# Fetch issue comments (general PR comments) -# claude[bot] typically posts here -fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/issues/$PR_NUMBER/comments" "$ISSUE_COMMENTS_FILE" "issue comments" - -# Fetch PR review comments (inline code comments) -# codex (chatgpt-codex-connector[bot]) typically posts inline comments here -fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/pulls/$PR_NUMBER/comments" "$REVIEW_COMMENTS_FILE" "PR review comments" - -# Fetch PR reviews (summary reviews with approval status) -# Both bots may post summary reviews here -fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/pulls/$PR_NUMBER/reviews" "$PR_REVIEWS_FILE" "PR reviews" - -# ======================================== -# Process and Format Comments -# ======================================== - -# Function to check if user is a bot -is_bot() { - local user_type="$1" - local user_login="$2" - - if [[ "$user_type" == "Bot" ]] || [[ "$user_login" == *"[bot]" ]]; then - echo "true" - else - echo "false" - fi -} - -# Function to format timestamp for comparison -format_timestamp() { - local ts="$1" - # Remove trailing Z and convert to comparable format - echo "$ts" | sed 's/Z$//' | tr 'T' ' ' -} - -# Initialize output file -cat > "$OUTPUT_FILE" << EOF -# PR Comments for #$PR_NUMBER - -Fetched at: $(date -u +%Y-%m-%dT%H:%M:%SZ) -Repository: $REPO_OWNER/$REPO_NAME - ---- - -EOF - -# Process all comments into a unified format -# Create a combined JSON with all comments -ALL_COMMENTS_FILE="$TEMP_DIR/all_comments.json" - -# Process issue comments -jq -r --arg type "issue_comment" ' - if type == "array" then - .[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .created_at, - updated_at: .updated_at, - body: .body, - path: null, - line: null, - state: null - } - else - empty - end -' "$ISSUE_COMMENTS_FILE" > "$TEMP_DIR/issue_processed.jsonl" 2>/dev/null || true - -# Process review comments (inline) -jq -r --arg type "review_comment" ' - if type == "array" then - .[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .created_at, - updated_at: .updated_at, - body: .body, - path: .path, - line: (.line // .original_line), - state: null - } - else - empty - end -' "$REVIEW_COMMENTS_FILE" > "$TEMP_DIR/review_processed.jsonl" 2>/dev/null || true - -# Process PR reviews -# Note: Include all reviews, even those with empty body (e.g. approval-only reviews) -# For empty body reviews, use a placeholder indicating the state -jq -r --arg type "pr_review" ' - if type == "array" then - .[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .submitted_at, - updated_at: .submitted_at, - body: (if .body == null or .body == "" then "[Review state: \(.state)]" else .body end), - path: null, - line: null, - state: .state - } - else - empty - end -' "$PR_REVIEWS_FILE" > "$TEMP_DIR/reviews_processed.jsonl" 2>/dev/null || true - -# Combine all processed comments and deduplicate by id -cat "$TEMP_DIR/issue_processed.jsonl" "$TEMP_DIR/review_processed.jsonl" "$TEMP_DIR/reviews_processed.jsonl" 2>/dev/null | \ - jq -s 'unique_by(.id)' > "$ALL_COMMENTS_FILE" - -# Filter by timestamp if provided -if [[ -n "$AFTER_TIMESTAMP" ]]; then - jq --arg after "$AFTER_TIMESTAMP" ' - [.[] | select(.created_at > $after)] - ' "$ALL_COMMENTS_FILE" > "$TEMP_DIR/filtered.json" - mv "$TEMP_DIR/filtered.json" "$ALL_COMMENTS_FILE" -fi - -# Sort: human comments first, then by timestamp (newest first) -# Uses fromdateiso8601 for proper ISO 8601 timestamp parsing -# Filter out entries with null created_at to avoid fromdateiso8601 errors -jq ' - [.[] | select(.created_at != null)] | - sort_by( - (if .author_type == "Bot" or (.author | test("\\[bot\\]$")) then 1 else 0 end), - -(.created_at | fromdateiso8601) - ) -' "$ALL_COMMENTS_FILE" > "$TEMP_DIR/sorted.json" - -# Format comments into markdown -COMMENT_COUNT=$(jq 'length' "$TEMP_DIR/sorted.json") - -if [[ "$COMMENT_COUNT" == "0" ]]; then - cat >> "$OUTPUT_FILE" << EOF -*No comments found.* - ---- - -This PR has no review comments yet from the monitored bots. -EOF -else - # Add section headers - echo "## Human Comments" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - - # First pass: human comments - HUMAN_COMMENTS=$(jq -r ' - .[] | select(.author_type != "Bot" and (.author | test("\\[bot\\]$") | not)) | - "### Comment from \(.author)\n\n" + - "- **Type**: \(.type | gsub("_"; " "))\n" + - "- **Time**: \(.created_at)\n" + - (if .path then "- **File**: `\(.path)`\(if .line then " (line \(.line))" else "" end)\n" else "" end) + - (if .state then "- **Status**: \(.state)\n" else "" end) + - "\n\(.body)\n\n---\n" - ' "$TEMP_DIR/sorted.json" 2>/dev/null || true) - - if [[ -n "$HUMAN_COMMENTS" ]]; then - echo "$HUMAN_COMMENTS" >> "$OUTPUT_FILE" - else - echo "*No human comments.*" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - fi - - echo "" >> "$OUTPUT_FILE" - - # Second pass: bot comments - if [[ -n "$ACTIVE_BOTS" ]]; then - # Map bot names to GitHub comment author names: - # - claude -> claude[bot] - # - codex -> chatgpt-codex-connector[bot] - map_bot_to_author() { - local bot="$1" - case "$bot" in - codex) echo "chatgpt-codex-connector[bot]" ;; - *) echo "${bot}[bot]" ;; - esac - } - - # Group bot comments by active bots - echo "## Bot Comments (Grouped by Bot)" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - - IFS=',' read -ra BOT_ARRAY <<< "$ACTIVE_BOTS" - for bot in "${BOT_ARRAY[@]}"; do - bot=$(echo "$bot" | tr -d ' ') - author=$(map_bot_to_author "$bot") - echo "### Comments from ${author}" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - - BOT_COMMENTS=$(jq -r --arg author "$author" ' - [.[] | select(.author == $author)] | - if length == 0 then - "*No comments from this bot.*\n" - else - .[] | - "#### Comment\n\n" + - "- **Type**: \(.type | gsub("_"; " "))\n" + - "- **Time**: \(.created_at)\n" + - (if .path then "- **File**: `\(.path)`\(if .line then " (line \(.line))" else "" end)\n" else "" end) + - (if .state then "- **Status**: \(.state)\n" else "" end) + - "\n\(.body)\n\n---\n" - end - ' "$TEMP_DIR/sorted.json" 2>/dev/null || echo "*Error reading comments.*") - - echo "$BOT_COMMENTS" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - done - else - # Default: all bot comments together - echo "## Bot Comments" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - - jq -r ' - .[] | select(.author_type == "Bot" or (.author | test("\\[bot\\]$"))) | - "### Comment from \(.author)\n\n" + - "- **Type**: \(.type | gsub("_"; " "))\n" + - "- **Time**: \(.created_at)\n" + - (if .path then "- **File**: `\(.path)`\(if .line then " (line \(.line))" else "" end)\n" else "" end) + - (if .state then "- **Status**: \(.state)\n" else "" end) + - "\n\(.body)\n\n---\n" - ' "$TEMP_DIR/sorted.json" >> "$OUTPUT_FILE" 2>/dev/null || true - fi -fi - -echo "" >> "$OUTPUT_FILE" -echo "---" >> "$OUTPUT_FILE" -echo "" >> "$OUTPUT_FILE" -echo "*End of comments*" >> "$OUTPUT_FILE" - -# Report API failures (non-fatal but logged) -if [[ $API_FAILURES -gt 0 ]]; then - echo "WARNING: $API_FAILURES API endpoint(s) failed after retries. Some comments may be missing." >&2 - echo "" >> "$OUTPUT_FILE" - echo "**Warning:** Some API calls failed. Comments may be incomplete." >> "$OUTPUT_FILE" -fi - -exit 0 diff --git a/scripts/humanize.sh b/scripts/humanize.sh index 1613dd62..9804bde5 100755 --- a/scripts/humanize.sh +++ b/scripts/humanize.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # humanize.sh - Humanize shell utilities # Part of rc.d configuration # Compatible with both bash and zsh @@ -33,6 +33,39 @@ humanize_split_to_array() { fi } +# Parse issue breakdown from goal-tracker.md +# Returns: blocking_issues|queued_issues|open_issues +humanize_parse_goal_tracker_issue_counts() { + local tracker_file="$1" + if [[ ! -f "$tracker_file" ]]; then + echo "0|0|0" + return + fi + + _count_table_data_rows() { + local row_count + row_count=$(sed -n "/$1/,/$2/p" "$tracker_file" | grep -cE '^\|' || true) + row_count=${row_count:-0} + echo $((row_count > 2 ? row_count - 2 : 0)) + } + + local blocking_issues + local queued_issues + local open_issues + + blocking_issues=$(_count_table_data_rows '### Blocking Side Issues' '^###') + queued_issues=$(_count_table_data_rows '### Queued Side Issues' '^###') + open_issues=$((blocking_issues + queued_issues)) + + # Legacy schema only had Open Issues; treat them as blocking for safety. + if [[ "$open_issues" -eq 0 ]]; then + open_issues=$(_count_table_data_rows '### Open Issues' '^###') + blocking_issues="$open_issues" + fi + + echo "${blocking_issues}|${queued_issues}|${open_issues}" +} + # Parse goal-tracker.md and return summary values # Returns: total_acs|completed_acs|active_tasks|completed_tasks|deferred_tasks|open_issues|goal_summary humanize_parse_goal_tracker() { @@ -105,9 +138,10 @@ humanize_parse_goal_tracker() { local deferred_tasks deferred_tasks=$(_count_table_data_rows '### Explicitly Deferred' '^###') - # Count Open Issues - local open_issues - open_issues=$(_count_table_data_rows '### Open Issues' '^###') + # Count Open Issues (new schema prefers Blocking/Queued Side Issues; old schema used Open Issues) + local -a issue_parts + humanize_split_to_array issue_parts "$(humanize_parse_goal_tracker_issue_counts "$tracker_file")" + local open_issues="${issue_parts[2]}" # Extract Ultimate Goal summary (first content line after heading) local goal_summary @@ -364,8 +398,11 @@ _humanize_monitor_codex() { local review_started=$(grep -E "^review_started:" "$state_file" 2>/dev/null | sed 's/review_started: *//' | tr -d ' ') local agent_teams=$(grep -E "^agent_teams:" "$state_file" 2>/dev/null | sed 's/agent_teams: *//' | tr -d ' ') local push_every_round=$(grep -E "^push_every_round:" "$state_file" 2>/dev/null | sed 's/push_every_round: *//' | tr -d ' ') + local mainline_stall_count=$(grep -E "^mainline_stall_count:" "$state_file" 2>/dev/null | sed 's/mainline_stall_count: *//' | tr -d ' ') + local last_mainline_verdict=$(grep -E "^last_mainline_verdict:" "$state_file" 2>/dev/null | sed 's/last_mainline_verdict: *//' | tr -d ' ') + local drift_status=$(grep -E "^drift_status:" "$state_file" 2>/dev/null | sed 's/drift_status: *//' | tr -d ' ') - echo "${current_round:-N/A}|${max_iterations:-N/A}|${full_review_round:-N/A}|${codex_model:-N/A}|${codex_effort:-N/A}|${started_at:-N/A}|${plan_file:-N/A}|${ask_codex_question:-false}|${review_started:-false}|${agent_teams:-}|${push_every_round:-}" + echo "${current_round:-N/A}|${max_iterations:-N/A}|${full_review_round:-N/A}|${codex_model:-N/A}|${codex_effort:-N/A}|${started_at:-N/A}|${plan_file:-N/A}|${ask_codex_question:-false}|${review_started:-false}|${agent_teams:-}|${push_every_round:-}|${mainline_stall_count:-0}|${last_mainline_verdict:-unknown}|${drift_status:-normal}" } # Internal wrappers that call top-level functions @@ -405,6 +442,9 @@ _humanize_monitor_codex() { local review_started="${state_parts[8]:-false}" local agent_teams="${state_parts[9]:-}" local push_every_round="${state_parts[10]:-}" + local mainline_stall_count="${state_parts[11]:-0}" + local last_mainline_verdict="${state_parts[12]:-unknown}" + local drift_status="${state_parts[13]:-normal}" # Parse goal-tracker.md local -a goal_parts @@ -416,6 +456,10 @@ _humanize_monitor_codex() { local deferred_tasks="${goal_parts[4]}" local open_issues="${goal_parts[5]}" local goal_summary="${goal_parts[6]}" + local -a issue_parts + _split_to_array issue_parts "$(humanize_parse_goal_tracker_issue_counts "$goal_tracker_file")" + local blocking_issues="${issue_parts[0]}" + local queued_issues="${issue_parts[1]}" # Parse git status local -a git_parts @@ -548,18 +592,35 @@ _humanize_monitor_codex() { fi team_mode_segment=" | Team Mode: ${team_color}${team_display}${reset}" fi - printf "${magenta}Status:${reset} ${status_line} | Codex Ask Question: ${ask_q_color}${ask_q_display}${reset}${team_mode_segment}${clr_eol}\n" + local drift_segment="" + local drift_color="${dim}" + if [[ "$drift_status" == "replan_required" ]]; then + drift_color="${red}" + elif [[ "${mainline_stall_count:-0}" -gt 0 ]]; then + drift_color="${yellow}" + fi + if [[ -n "$drift_status" ]]; then + drift_segment=" | Drift: ${drift_color}${drift_status}${reset} (${mainline_stall_count}, ${last_mainline_verdict})" + fi + printf "${magenta}Status:${reset} ${status_line} | Codex Ask Question: ${ask_q_color}${ask_q_display}${reset}${team_mode_segment}${drift_segment}${clr_eol}\n" # Progress line (color based on completion status) local ac_color="${green}" [[ "$completed_acs" -lt "$total_acs" ]] && ac_color="${yellow}" - local issue_color="${dim}" - [[ "$open_issues" -gt 0 ]] && issue_color="${red}" + local issue_total_color="${dim}" + [[ "$queued_issues" -gt 0 ]] && issue_total_color="${yellow}" + [[ "$blocking_issues" -gt 0 ]] && issue_total_color="${red}" # Use magenta for Progress and Git labels (status/data lines) printf "${magenta}Progress:${reset} ${ac_color}ACs: ${completed_acs}/${total_acs}${reset} Tasks: ${active_tasks} active, ${completed_tasks} done" [[ "$deferred_tasks" -gt 0 ]] && printf " ${yellow}${deferred_tasks} deferred${reset}" - [[ "$open_issues" -gt 0 ]] && printf " ${issue_color}Issues: ${open_issues}${reset}" + if [[ "$open_issues" -gt 0 ]]; then + printf " ${issue_total_color}Issues: ${open_issues}${reset}" + [[ "$blocking_issues" -gt 0 ]] && printf " (${red}%s blocking${reset}" "$blocking_issues" + [[ "$queued_issues" -gt 0 ]] && printf "%s${yellow}%s queued${reset}" \ + "$([[ "$blocking_issues" -gt 0 ]] && echo ", " || echo "(")" "$queued_issues" + printf ")" + fi printf "${clr_eol}\n" # Git status line (same color as Progress) @@ -821,6 +882,10 @@ _humanize_monitor_codex() { # Handle case when no log file exists for current session if [[ -z "$current_file" ]]; then + # Track terminal dimensions to detect resize (fallback for SIGWINCH) + local centered_last_cols=$(tput cols) + local centered_last_rows=$(tput lines) + # Render centered no-log message if status changed or not yet shown if [[ "$last_no_log_status" != "$current_loop_status" ]]; then if [[ "$current_loop_status" == "active" ]]; then @@ -842,11 +907,17 @@ _humanize_monitor_codex() { return 0 fi - # Handle terminal resize at a safe point + # Detect terminal resize via both SIGWINCH flag and actual dimension change local redraw_centered_msg=false - if [[ "$resize_needed" == "true" ]]; then + local cur_cols=$(tput cols) + local cur_rows=$(tput lines) + if [[ "$resize_needed" == "true" ]] || \ + [[ "$cur_cols" != "$centered_last_cols" ]] || \ + [[ "$cur_rows" != "$centered_last_rows" ]]; then resize_needed=false redraw_centered_msg=true + centered_last_cols="$cur_cols" + centered_last_rows="$cur_rows" # Check if terminal is too small if ! _check_terminal_size; then _display_terminal_too_small @@ -858,6 +929,8 @@ _humanize_monitor_codex() { [[ "$monitor_running" != "true" ]] && break # Terminal is now big enough, reinitialize _setup_terminal + centered_last_cols=$(tput cols) + centered_last_rows=$(tput lines) else _update_scroll_region fi @@ -942,6 +1015,10 @@ _humanize_monitor_codex() { local log_lines=$(_get_log_area_height) tail -n "$log_lines" "$current_file" 2>/dev/null + # Track terminal dimensions to detect resize (fallback for SIGWINCH) + local follow_last_cols=$(tput cols) + local follow_last_rows=$(tput lines) + # Incremental monitoring loop while [[ "$monitor_running" == "true" ]]; do sleep 0.5 # Check more frequently for smoother output @@ -953,9 +1030,15 @@ _humanize_monitor_codex() { return 0 fi - # Handle terminal resize at a safe point - if [[ "$resize_needed" == "true" ]]; then + # Detect terminal resize via both SIGWINCH flag and actual dimension change + local cur_cols=$(tput cols) + local cur_rows=$(tput lines) + if [[ "$resize_needed" == "true" ]] || \ + [[ "$cur_cols" != "$follow_last_cols" ]] || \ + [[ "$cur_rows" != "$follow_last_rows" ]]; then resize_needed=false + follow_last_cols="$cur_cols" + follow_last_rows="$cur_rows" # Check if terminal is too small if ! _check_terminal_size; then _display_terminal_too_small @@ -967,6 +1050,8 @@ _humanize_monitor_codex() { [[ "$monitor_running" != "true" ]] && break # Terminal is now big enough, reinitialize _setup_terminal + follow_last_cols=$(tput cols) + follow_last_rows=$(tput lines) else _update_scroll_region fi @@ -1115,19 +1200,23 @@ humanize() { rlcr) _humanize_monitor_codex "$@" ;; - pr) - _humanize_monitor_pr "$@" - ;; skill) _humanize_monitor_skill "$@" ;; + codex) + _humanize_monitor_skill --tool-filter codex "$@" + ;; + gemini) + _humanize_monitor_skill --tool-filter gemini "$@" + ;; *) - echo "Usage: humanize monitor <rlcr|pr|skill>" + echo "Usage: humanize monitor <rlcr|skill|codex|gemini>" echo "" echo "Subcommands:" echo " rlcr Monitor the latest RLCR loop log from .humanize/rlcr" - echo " pr Monitor the latest PR loop from .humanize/pr-loop" - echo " skill Monitor ask-codex skill invocations from .humanize/skill" + echo " skill Monitor all skill invocations (codex + gemini)" + echo " codex Monitor ask-codex skill invocations only" + echo " gemini Monitor ask-gemini skill invocations only" echo "" echo "Features:" echo " - Fixed status bar showing session info, round progress, model config" @@ -1143,449 +1232,14 @@ humanize() { echo "" echo "Commands:" echo " monitor rlcr Monitor the latest RLCR loop log" - echo " monitor pr Monitor the latest PR loop" - echo " monitor skill Monitor ask-codex skill invocations" + echo " monitor skill Monitor all skill invocations (codex + gemini)" + echo " monitor codex Monitor ask-codex skill invocations only" + echo " monitor gemini Monitor ask-gemini skill invocations only" return 1 ;; esac } -# ======================================== -# PR Loop Monitor Function -# ======================================== - -# Monitor the latest PR loop from .humanize/pr-loop with fixed status bar and rolling tail -_humanize_monitor_pr() { - # Enable 0-indexed arrays in zsh for bash compatibility - [[ -n "${ZSH_VERSION:-}" ]] && setopt localoptions ksharrays - - local loop_dir=".humanize/pr-loop" - local current_file="" - local current_session_dir="" - local check_interval=2 # seconds between checking for new files - local status_bar_height=10 # number of lines for status bar - local once_mode=false - - # Parse arguments - while [[ $# -gt 0 ]]; do - case "$1" in - --once) - once_mode=true - shift - ;; - *) - shift - ;; - esac - done - - # Check if .humanize/pr-loop exists - if [[ ! -d "$loop_dir" ]]; then - echo "Error: $loop_dir directory not found in current directory" - echo "Are you in a project with an active PR loop?" - return 1 - fi - - # Use shared monitor helper for finding latest session - _pr_find_latest_session() { - monitor_find_latest_session "$loop_dir" - } - - # Function to find the latest monitorable file (pr-check, pr-feedback, or pr-comment) - _pr_find_latest_file() { - local session_dir="$1" - [[ ! -d "$session_dir" ]] && return - - local latest="" - local latest_mtime=0 - - # Check for pr-check files (Codex analysis output) - while IFS= read -r f; do - [[ -z "$f" ]] && continue - [[ ! -f "$f" ]] && continue - local mtime=$(stat -c %Y "$f" 2>/dev/null || stat -f %m "$f" 2>/dev/null || echo 0) - if [[ "$mtime" -gt "$latest_mtime" ]]; then - latest="$f" - latest_mtime="$mtime" - fi - done < <(find "$session_dir" -maxdepth 1 -name 'round-*-pr-check.md' -type f 2>/dev/null) - - # Check for pr-feedback files - while IFS= read -r f; do - [[ -z "$f" ]] && continue - [[ ! -f "$f" ]] && continue - local mtime=$(stat -c %Y "$f" 2>/dev/null || stat -f %m "$f" 2>/dev/null || echo 0) - if [[ "$mtime" -gt "$latest_mtime" ]]; then - latest="$f" - latest_mtime="$mtime" - fi - done < <(find "$session_dir" -maxdepth 1 -name 'round-*-pr-feedback.md' -type f 2>/dev/null) - - # Check for pr-comment files - while IFS= read -r f; do - [[ -z "$f" ]] && continue - [[ ! -f "$f" ]] && continue - local mtime=$(stat -c %Y "$f" 2>/dev/null || stat -f %m "$f" 2>/dev/null || echo 0) - if [[ "$mtime" -gt "$latest_mtime" ]]; then - latest="$f" - latest_mtime="$mtime" - fi - done < <(find "$session_dir" -maxdepth 1 -name 'round-*-pr-comment.md' -type f 2>/dev/null) - - echo "$latest" - } - - # Use shared monitor helper for finding state file - # Note: monitor_find_state_file returns "approve" not "approved" for approve-state.md - # so we maintain the PR-specific status mapping here for display purposes - _pr_find_state_file() { - local session_dir="$1" - local result - result=$(monitor_find_state_file "$session_dir") - local state_file="${result%|*}" - local stop_reason="${result#*|}" - - # Map stop reasons to PR-friendly status names - case "$stop_reason" in - approve) stop_reason="approved" ;; - maxiter) stop_reason="max-iterations" ;; - esac - - echo "$state_file|$stop_reason" - } - - # Function to parse state.md and return key values - _pr_parse_state_md() { - local state_file="$1" - [[ ! -f "$state_file" ]] && echo "0|42|?|?|?|?|N/A" && return - - local frontmatter - frontmatter=$(sed -n '/^---$/,/^---$/{ /^---$/d; p; }' "$state_file" 2>/dev/null || echo "") - - local current_round=$(echo "$frontmatter" | grep "^current_round:" | sed "s/current_round: *//" | tr -d ' ') - local max_iterations=$(echo "$frontmatter" | grep "^max_iterations:" | sed "s/max_iterations: *//" | tr -d ' ') - local pr_number=$(echo "$frontmatter" | grep "^pr_number:" | sed "s/pr_number: *//" | tr -d ' ') - local start_branch=$(echo "$frontmatter" | grep "^start_branch:" | sed "s/start_branch: *//" | tr -d '"' || true) - local configured_bots=$(echo "$frontmatter" | sed -n '/^configured_bots:$/,/^[a-z_]*:/{ /^ - /{ s/^ - //; p; } }' | tr '\n' ',' | sed 's/,$//') - local active_bots=$(echo "$frontmatter" | sed -n '/^active_bots:$/,/^[a-z_]*:/{ /^ - /{ s/^ - //; p; } }' | tr '\n' ',' | sed 's/,$//') - local codex_model=$(echo "$frontmatter" | grep "^codex_model:" | sed "s/codex_model: *//" | tr -d ' ') - local codex_effort=$(echo "$frontmatter" | grep "^codex_effort:" | sed "s/codex_effort: *//" | tr -d ' ') - local started_at=$(echo "$frontmatter" | grep "^started_at:" | sed "s/started_at: *//" || true) - - # Apply defaults - current_round=${current_round:-0} - max_iterations=${max_iterations:-42} - pr_number=${pr_number:-"?"} - start_branch=${start_branch:-"?"} - configured_bots=${configured_bots:-"none"} - active_bots=${active_bots:-"none"} - codex_model=${codex_model:-"$DEFAULT_CODEX_MODEL"} - codex_effort=${codex_effort:-"medium"} - started_at=${started_at:-"N/A"} - - echo "$current_round|$max_iterations|$pr_number|$start_branch|$configured_bots|$active_bots|$codex_model|$codex_effort|$started_at" - } - - # Draw the status bar at the top - _pr_draw_status_bar() { - local session_dir="$1" - local monitored_file="$2" - local loop_status="$3" - local term_width=$(tput cols) - - # Parse state file - local state_info=$(_pr_find_state_file "$session_dir") - local state_file="${state_info%|*}" - [[ -z "$loop_status" ]] && loop_status="${state_info#*|}" - - local state_values=$(_pr_parse_state_md "$state_file") - IFS='|' read -r current_round max_iterations pr_number start_branch configured_bots active_bots codex_model codex_effort started_at <<< "$state_values" - - # Save cursor position and move to top - tput sc - - # ANSI color codes - local green="\033[1;32m" yellow="\033[1;33m" cyan="\033[1;36m" - local magenta="\033[1;35m" red="\033[1;31m" reset="\033[0m" - local bg="\033[44m" bold="\033[1m" dim="\033[2m" - local clr_eol="\033[K" # Clear to end of line (reduces flicker vs clearing entire area) - - # Move to top and draw directly (no pre-clearing to avoid flicker) - tput cup 0 0 - local session_basename=$(basename "$session_dir") - printf "${bg}${bold}%-${term_width}s${reset}${clr_eol}\n" " PR Loop Monitor" - printf "${cyan}Session:${reset} %s ${cyan}PR:${reset} #%s ${cyan}Branch:${reset} %s${clr_eol}\n" "$session_basename" "$pr_number" "$start_branch" - printf "${green}Round:${reset} ${bold}%s${reset} / %s ${yellow}Codex:${reset} %s (%s)${clr_eol}\n" "$current_round" "$max_iterations" "$codex_model" "$codex_effort" - - # Detect phase and determine status color - local phase="" - local phase_display="" - if type get_pr_loop_phase &>/dev/null; then - phase=$(get_pr_loop_phase "$session_dir") - phase_display=$(get_pr_loop_phase_display "$phase" "$active_bots") - fi - - # Loop status line with color based on phase/status - local status_color="${green}" - case "$phase" in - approved) status_color="${cyan}" ;; - cancelled) status_color="${yellow}" ;; - maxiter) status_color="${red}" ;; - codex_analyzing) status_color="${magenta}" ;; - waiting_initial_review) status_color="${yellow}" ;; - waiting_reviewer) status_color="${green}" ;; - *) status_color="${dim}" ;; - esac - - if [[ -n "$phase_display" ]]; then - printf "${magenta}Phase:${reset} ${status_color}%s${reset}${clr_eol}\n" "$phase_display" - else - # Fallback to loop_status if phase detection not available - case "$loop_status" in - active) status_color="${green}" ;; - approved|completed) status_color="${cyan}" ;; - cancelled) status_color="${yellow}" ;; - max-iterations) status_color="${red}" ;; - *) status_color="${dim}" ;; - esac - printf "${magenta}Status:${reset} ${status_color}%s${reset}${clr_eol}\n" "$loop_status" - fi - - # Bot status - printf "${cyan}Configured Bots:${reset} %s${clr_eol}\n" "$configured_bots" - if [[ "$active_bots" == "none" ]] || [[ -z "$active_bots" ]]; then - printf "${green}Active Bots:${reset} ${green}all approved${reset}${clr_eol}\n" - else - printf "${yellow}Active Bots:${reset} %s${clr_eol}\n" "$active_bots" - fi - - # Goal tracker issue stats - local goal_tracker_file="$session_dir/goal-tracker.md" - if [[ -f "$goal_tracker_file" ]] && type humanize_parse_pr_goal_tracker &>/dev/null; then - local tracker_stats=$(humanize_parse_pr_goal_tracker "$goal_tracker_file") - local total_issues resolved_issues remaining_issues last_reviewer - IFS='|' read -r total_issues resolved_issues remaining_issues last_reviewer <<< "$tracker_stats" - if [[ "$total_issues" != "0" ]] || [[ "$resolved_issues" != "0" ]]; then - printf "${cyan}Issues:${reset} Found: ${yellow}%s${reset}, Resolved: ${green}%s${reset}, Remaining: ${red}%s${reset}${clr_eol}\n" "$total_issues" "$resolved_issues" "$remaining_issues" - fi - fi - - # Started time - local start_display="$started_at" - if [[ "$started_at" != "N/A" ]]; then - start_display=$(echo "$started_at" | sed 's/T/ /; s/Z/ UTC/') - fi - printf "${dim}Started:${reset} %s${clr_eol}\n" "$start_display" - - # Currently monitoring - local file_basename="" - [[ -n "$monitored_file" ]] && file_basename=$(basename "$monitored_file") - printf "${dim}Watching:${reset} %s${clr_eol}\n" "${file_basename:-none}" - - # Separator - printf "%-${term_width}s${clr_eol}\n" "$(printf '%*s' "$term_width" | tr ' ' '-')" - - # Restore cursor position - tput rc - } - - # Track state for cleanup - local TAIL_PID="" - local monitor_running=true - local cleanup_done=false - - # Cleanup function - called by trap - # Must work cleanly in both bash and zsh - _pr_cleanup() { - # Prevent multiple cleanup calls - [[ "${cleanup_done:-false}" == "true" ]] && return - cleanup_done=true - monitor_running=false - - # Reset traps to prevent re-triggering - trap - INT TERM EXIT 2>/dev/null || true - - # Kill background tail if running - if [[ -n "${TAIL_PID:-}" ]]; then - if kill -0 "$TAIL_PID" 2>/dev/null; then - kill "$TAIL_PID" 2>/dev/null || true - # Use timeout-safe wait - ( wait "$TAIL_PID" 2>/dev/null ) & - wait $! 2>/dev/null || true - fi - fi - - # Show cursor and restore terminal - tput cnorm 2>/dev/null || true - tput rmcup 2>/dev/null || true - echo "" - echo "Monitor stopped." - } - - # Set up signal handlers (bash/zsh compatible) - # Use TRAPINT/TRAPTERM for zsh, standard trap for bash - if [[ -n "${ZSH_VERSION:-}" ]]; then - # zsh: use TRAPINT and TRAPTERM for better handling - TRAPINT() { _pr_cleanup; return 130; } - TRAPTERM() { _pr_cleanup; return 143; } - # Also set EXIT trap for clean exit - trap '_pr_cleanup' EXIT - else - # bash: use standard trap - trap '_pr_cleanup' EXIT INT TERM - fi - - # One-shot mode: print status once and exit (for testing and scripting) - if [[ "$once_mode" == "true" ]]; then - local session_dir=$(_pr_find_latest_session) - if [[ -z "$session_dir" ]]; then - echo "No PR loop sessions found in $loop_dir" - return 1 - fi - - local state_info=$(_pr_find_state_file "$session_dir") - local state_file="${state_info%|*}" - local loop_status="${state_info#*|}" - - if [[ -z "$state_file" ]]; then - echo "No state file found in $session_dir" - return 1 - fi - - local state_values=$(_pr_parse_state_md "$state_file") - IFS='|' read -r current_round max_iterations pr_number start_branch configured_bots active_bots codex_model codex_effort started_at <<< "$state_values" - - # Get phase for --once mode display - local phase="" - local phase_display="" - if declare -f get_pr_loop_phase &>/dev/null; then - phase=$(get_pr_loop_phase "$session_dir") - phase_display=$(get_pr_loop_phase_display "$phase" "$active_bots") - fi - - echo "==========================================" - echo " PR Loop Monitor" - echo "==========================================" - echo "" - echo "Session: $(basename "$session_dir")" - if [[ -n "$phase_display" ]]; then - echo "Phase: $phase_display" - else - echo "Status: $loop_status" - fi - echo "" - echo "PR Number: #$pr_number" - echo "Branch: $start_branch" - echo "Configured Bots: ${configured_bots:-none}" - echo "Active Bots: ${active_bots:-none}" - echo "" - echo "Round: $current_round / $max_iterations" - echo "Codex: $codex_model:$codex_effort" - echo "Started: $started_at" - echo "" - echo "==========================================" - echo " Recent Files" - echo "==========================================" - echo "" - - # List recent round files - local round_files - round_files=$(find "$session_dir" -maxdepth 1 -name 'round-*.md' -type f 2>/dev/null) - if [[ -n "$round_files" ]]; then - echo "$round_files" | xargs ls -lt 2>/dev/null | head -10 | while read -r line; do - echo " $line" - done - fi - - echo "" - echo "==========================================" - echo " Latest Activity" - echo "==========================================" - echo "" - - local latest_file=$(_pr_find_latest_file "$session_dir") - if [[ -n "$latest_file" && -f "$latest_file" ]]; then - echo "Latest: $(basename "$latest_file")" - echo "----------------------------------------" - tail -20 "$latest_file" - echo "" - fi - - echo "==========================================" - return 0 - fi - - # Initialize terminal - tput smcup # Save screen - tput civis # Hide cursor - clear - - # Create scrolling region below status bar - tput csr $status_bar_height $(($(tput lines) - 1)) - - # Main monitoring loop - while [[ "$monitor_running" == "true" ]]; do - # Find latest session - local session_dir=$(_pr_find_latest_session) - if [[ -z "$session_dir" ]]; then - tput cup $status_bar_height 0 - echo "Waiting for PR loop session..." - sleep "$check_interval" - continue - fi - - # Check if session changed - if [[ "$session_dir" != "$current_session_dir" ]]; then - current_session_dir="$session_dir" - current_file="" - [[ -n "$TAIL_PID" ]] && kill "$TAIL_PID" 2>/dev/null - TAIL_PID="" - fi - - # Find latest file to monitor - local latest_file=$(_pr_find_latest_file "$session_dir") - - # Get loop status - local state_info=$(_pr_find_state_file "$session_dir") - local loop_status="${state_info#*|}" - - # Update status bar - _pr_draw_status_bar "$session_dir" "$latest_file" "$loop_status" - - # Check if file changed or new file appeared - if [[ "$latest_file" != "$current_file" ]] && [[ -n "$latest_file" ]]; then - current_file="$latest_file" - - # Kill old tail process - [[ -n "$TAIL_PID" ]] && kill "$TAIL_PID" 2>/dev/null - - # Clear content area and show new file - tput cup $status_bar_height 0 - tput ed # Clear to end of screen - - # Start tailing the new file - tail -n +1 -f "$current_file" 2>/dev/null & - TAIL_PID=$! - fi - - # If no file to monitor yet, show waiting message - if [[ -z "$current_file" ]]; then - tput cup $status_bar_height 0 - echo "Waiting for PR loop activity..." - fi - - sleep "$check_interval" - done - - # Reset trap handlers (zsh and bash) - if [[ -n "${ZSH_VERSION:-}" ]]; then - # zsh: undefine the TRAP* functions - unfunction TRAPINT TRAPTERM 2>/dev/null || true - else - trap - INT TERM EXIT - fi -} - # Source skill monitor (provides _humanize_monitor_skill) if [[ -f "$HUMANIZE_SCRIPT_DIR/lib/monitor-skill.sh" ]]; then source "$HUMANIZE_SCRIPT_DIR/lib/monitor-skill.sh" diff --git a/scripts/install-codex-hooks.sh b/scripts/install-codex-hooks.sh new file mode 100755 index 00000000..407fe668 --- /dev/null +++ b/scripts/install-codex-hooks.sh @@ -0,0 +1,209 @@ +#!/usr/bin/env bash +# +# Install/update Humanize native Codex hooks in CODEX_HOME/hooks.json. +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +CODEX_CONFIG_DIR="${CODEX_HOME:-${HOME}/.codex}" +RUNTIME_ROOT="$CODEX_CONFIG_DIR/skills/humanize" +DRY_RUN="false" +ENABLE_FEATURE="true" +HOOKS_TEMPLATE="$REPO_ROOT/config/codex-hooks.json" + +usage() { + cat <<'EOF' +Install/update Humanize native Codex hooks. + +Usage: + scripts/install-codex-hooks.sh [options] + +Options: + --codex-config-dir PATH Codex config dir (default: ${CODEX_HOME:-~/.codex}) + --runtime-root PATH Installed Humanize runtime root (default: <codex-config-dir>/skills/humanize) + --skip-enable-feature Do not run `codex features enable codex_hooks` + --dry-run Print actions without writing + -h, --help Show help +EOF +} + +log() { + printf '[install-codex-hooks] %s\n' "$*" +} + +die() { + printf '[install-codex-hooks] Error: %s\n' "$*" >&2 + exit 1 +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --codex-config-dir) + [[ -n "${2:-}" ]] || die "--codex-config-dir requires a value" + CODEX_CONFIG_DIR="$2" + shift 2 + ;; + --runtime-root) + [[ -n "${2:-}" ]] || die "--runtime-root requires a value" + RUNTIME_ROOT="$2" + shift 2 + ;; + --skip-enable-feature) + ENABLE_FEATURE="false" + shift + ;; + --dry-run) + DRY_RUN="true" + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + die "unknown option: $1" + ;; + esac +done + +[[ -f "$HOOKS_TEMPLATE" ]] || die "hook template not found: $HOOKS_TEMPLATE" + +HOOKS_FILE="$CODEX_CONFIG_DIR/hooks.json" + +require_codex_hooks_support() { + if ! command -v codex >/dev/null 2>&1; then + die "Codex CLI with native hooks support is required. Install Codex 0.114.0+ first." + fi + + if ! codex features list 2>/dev/null | grep -qE '^codex_hooks[[:space:]]'; then + die "Installed Codex CLI does not expose the codex_hooks feature. Humanize Codex install requires Codex 0.114.0+." + fi +} + +merge_hooks_json() { + local hooks_file="$1" + local template_file="$2" + local runtime_root="$3" + + if ! command -v python3 >/dev/null 2>&1; then + die "python3 is required to merge Codex hooks" + fi + + python3 - "$hooks_file" "$template_file" "$runtime_root" <<'PY' +import json +import pathlib +import re +import shlex +import sys + +hooks_file = pathlib.Path(sys.argv[1]) +template_file = pathlib.Path(sys.argv[2]) +runtime_root = sys.argv[3] + +template_text = template_file.read_text(encoding="utf-8") +# JSON-escape the runtime root so metacharacters (quotes, backslashes) do not +# corrupt the template before json.loads parses it. +escaped_root = json.dumps(runtime_root)[1:-1] # strip outer quotes from dumps output +template_text = template_text.replace("{{HUMANIZE_RUNTIME_ROOT}}", escaped_root) +template = json.loads(template_text) + +# Shell-quote command paths so spaces in runtime_root do not split the command +for group_list in template.get("hooks", {}).values(): + for group in group_list: + if isinstance(group, dict): + for hook in group.get("hooks", []): + if isinstance(hook, dict) and "command" in hook: + hook["command"] = shlex.quote(hook["command"]) + +existing = {} +if hooks_file.exists(): + with hooks_file.open("r", encoding="utf-8") as fh: + existing = json.load(fh) + +if not isinstance(existing, dict): + raise SystemExit(f"existing hooks config must be a JSON object: {hooks_file}") + +hooks = existing.setdefault("hooks", {}) +if not isinstance(hooks, dict): + raise SystemExit(f"existing hooks config has invalid 'hooks' object: {hooks_file}") + +stop_groups = hooks.get("Stop", []) +if stop_groups is None: + stop_groups = [] +if not isinstance(stop_groups, list): + raise SystemExit(f"existing hooks config has invalid Stop array: {hooks_file}") + +managed_pattern = re.compile(r"(^|/)hooks/(loop-codex-stop-hook\.sh|pr-loop-stop-hook\.sh)(['\"\s]|$)") + +filtered_groups = [] +for group in stop_groups: + if not isinstance(group, dict): + filtered_groups.append(group) + continue + group_hooks = group.get("hooks") + if not isinstance(group_hooks, list): + filtered_groups.append(group) + continue + kept_hooks = [] + for hook in group_hooks: + if not isinstance(hook, dict): + kept_hooks.append(hook) + continue + command = hook.get("command") + if isinstance(command, str) and managed_pattern.search(command): + continue + kept_hooks.append(hook) + if kept_hooks: + new_group = dict(group) + new_group["hooks"] = kept_hooks + filtered_groups.append(new_group) + +managed_stop_groups = template.get("hooks", {}).get("Stop", []) +filtered_groups.extend(managed_stop_groups) +hooks["Stop"] = filtered_groups + +if not existing.get("description"): + existing["description"] = template.get("description", "Humanize Codex Hooks") + +hooks_file.parent.mkdir(parents=True, exist_ok=True) +hooks_file.write_text(json.dumps(existing, indent=2) + "\n", encoding="utf-8") +PY +} + +enable_feature() { + local config_dir="$1" + + [[ "$ENABLE_FEATURE" == "true" ]] || return 0 + + if CODEX_HOME="$config_dir" codex features enable codex_hooks >/dev/null 2>&1; then + log "enabled codex_hooks feature in $config_dir/config.toml" + else + die "failed to enable codex_hooks feature automatically in $config_dir/config.toml" + fi +} + +log "codex config dir: $CODEX_CONFIG_DIR" +log "runtime root: $RUNTIME_ROOT" +log "hooks file: $HOOKS_FILE" + +require_codex_hooks_support + +if [[ "$DRY_RUN" == "true" ]]; then + log "DRY-RUN merge $HOOKS_TEMPLATE -> $HOOKS_FILE" + if [[ "$ENABLE_FEATURE" == "true" ]]; then + log "DRY-RUN enable codex_hooks feature in $CODEX_CONFIG_DIR/config.toml" + fi + exit 0 +fi + +merge_hooks_json "$HOOKS_FILE" "$HOOKS_TEMPLATE" "$RUNTIME_ROOT" +enable_feature "$CODEX_CONFIG_DIR" + +cat <<EOF + +Codex hooks installed. + hooks.json: $HOOKS_FILE + runtime root: $RUNTIME_ROOT +EOF diff --git a/scripts/install-skill.sh b/scripts/install-skill.sh index 7326ec62..3106201d 100755 --- a/scripts/install-skill.sh +++ b/scripts/install-skill.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Install/upgrade Humanize skills for Kimi and/or Codex. # @@ -16,6 +16,7 @@ # --skills-dir PATH Legacy alias for target skills dir (kept for compatibility) # --kimi-skills-dir PATH Kimi skills dir (default: ~/.config/agents/skills) # --codex-skills-dir PATH Codex skills dir (default: ${CODEX_HOME:-~/.codex}/skills) +# --codex-config-dir PATH Codex config dir for hooks/config.toml (default: ${CODEX_HOME:-~/.codex}) # --dry-run Print actions without writing # -h, --help Show help # @@ -24,9 +25,14 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +SKILLS_SOURCE_ROOT="" +RUNTIME_SOURCE_ROOT="" TARGET="kimi" KIMI_SKILLS_DIR="${HOME}/.config/agents/skills" CODEX_SKILLS_DIR="${CODEX_HOME:-${HOME}/.codex}/skills" +CODEX_CONFIG_DIR="${CODEX_HOME:-${HOME}/.codex}" +HUMANIZE_USER_CONFIG_DIR="${XDG_CONFIG_HOME:-${HOME}/.config}/humanize" +COMMAND_BIN_DIR="${HUMANIZE_COMMAND_BIN_DIR:-${HOME}/.local/bin}" LEGACY_SKILLS_DIR="" DRY_RUN="false" @@ -50,6 +56,8 @@ Options: --skills-dir PATH Legacy alias for target skills dir (compat) --kimi-skills-dir PATH Kimi skills dir (default: ~/.config/agents/skills) --codex-skills-dir PATH Codex skills dir (default: ${CODEX_HOME:-~/.codex}/skills) + --codex-config-dir PATH Codex config dir for hooks/config.toml (default: ${CODEX_HOME:-~/.codex}) + --command-bin-dir PATH Install helper command shims here (default: ~/.local/bin) --dry-run Print actions without writing -h, --help Show help EOF @@ -65,18 +73,50 @@ die() { } validate_repo() { - [[ -d "$REPO_ROOT/skills" ]] || die "skills directory not found under repo root: $REPO_ROOT" - [[ -d "$REPO_ROOT/scripts" ]] || die "scripts directory not found under repo root: $REPO_ROOT" - [[ -d "$REPO_ROOT/hooks" ]] || die "hooks directory not found under repo root: $REPO_ROOT" - [[ -d "$REPO_ROOT/prompt-template" ]] || die "prompt-template directory not found under repo root: $REPO_ROOT" - [[ -d "$REPO_ROOT/templates" ]] || die "templates directory not found under repo root: $REPO_ROOT" - [[ -d "$REPO_ROOT/config" ]] || die "config directory not found under repo root: $REPO_ROOT" - [[ -d "$REPO_ROOT/agents" ]] || die "agents directory not found under repo root: $REPO_ROOT" + [[ -n "$SKILLS_SOURCE_ROOT" ]] || die "internal error: SKILLS_SOURCE_ROOT not set" + [[ -n "$RUNTIME_SOURCE_ROOT" ]] || die "internal error: RUNTIME_SOURCE_ROOT not set" + [[ -d "$SKILLS_SOURCE_ROOT" ]] || die "skills source directory not found: $SKILLS_SOURCE_ROOT" + [[ -d "$RUNTIME_SOURCE_ROOT/scripts" ]] || die "scripts directory not found under runtime source root: $RUNTIME_SOURCE_ROOT" + [[ -d "$RUNTIME_SOURCE_ROOT/hooks" ]] || die "hooks directory not found under runtime source root: $RUNTIME_SOURCE_ROOT" + [[ -d "$RUNTIME_SOURCE_ROOT/prompt-template" ]] || die "prompt-template directory not found under runtime source root: $RUNTIME_SOURCE_ROOT" + [[ -d "$RUNTIME_SOURCE_ROOT/templates" ]] || die "templates directory not found under runtime source root: $RUNTIME_SOURCE_ROOT" + [[ -d "$RUNTIME_SOURCE_ROOT/config" ]] || die "config directory not found under runtime source root: $RUNTIME_SOURCE_ROOT" + [[ -d "$RUNTIME_SOURCE_ROOT/agents" ]] || die "agents directory not found under runtime source root: $RUNTIME_SOURCE_ROOT" for skill in "${SKILL_NAMES[@]}"; do - [[ -f "$REPO_ROOT/skills/$skill/SKILL.md" ]] || die "missing $REPO_ROOT/skills/$skill/SKILL.md" + [[ -f "$SKILLS_SOURCE_ROOT/$skill/SKILL.md" ]] || die "missing $SKILLS_SOURCE_ROOT/$skill/SKILL.md" done } +resolve_source_layout() { + local candidate_root="$1" + local runtime_root="$candidate_root" + local skills_root + + # Source checkout layout: + # <repo>/skills/<skill>/SKILL.md + # <repo>/scripts + if [[ -d "$candidate_root/skills" ]] && [[ -d "$candidate_root/scripts" ]]; then + SKILLS_SOURCE_ROOT="$candidate_root/skills" + RUNTIME_SOURCE_ROOT="$candidate_root" + return 0 + fi + + # Installed runtime layout: + # <skills-dir>/humanize/scripts/install-skill.sh + # <skills-dir>/humanize-gen-plan/SKILL.md + # <skills-dir>/humanize-rlcr/SKILL.md + if [[ -d "$runtime_root/scripts" ]] && [[ -d "$runtime_root/hooks" ]] && [[ -d "$runtime_root/prompt-template" ]]; then + skills_root="$(cd "$runtime_root/.." && pwd)" + if [[ -f "$skills_root/humanize/SKILL.md" ]] && [[ -f "$skills_root/humanize-gen-plan/SKILL.md" ]] && [[ -f "$skills_root/humanize-refine-plan/SKILL.md" ]] && [[ -f "$skills_root/humanize-rlcr/SKILL.md" ]]; then + SKILLS_SOURCE_ROOT="$skills_root" + RUNTIME_SOURCE_ROOT="$runtime_root" + return 0 + fi + fi + + die "could not resolve Humanize source layout from: $candidate_root" +} + sync_dir() { local src="$1" local dst="$2" @@ -107,7 +147,7 @@ sync_dir() { sync_one_skill() { local skill="$1" local target_dir="$2" - local src="$REPO_ROOT/skills/$skill" + local src="$SKILLS_SOURCE_ROOT/$skill" local dst="$target_dir/$skill" sync_dir "$src" "$dst" } @@ -120,7 +160,7 @@ install_runtime_bundle() { log "syncing runtime bundle into: $runtime_root" for component in scripts hooks prompt-template templates config agents; do - sync_dir "$REPO_ROOT/$component" "$runtime_root/$component" + sync_dir "$RUNTIME_SOURCE_ROOT/$component" "$runtime_root/$component" done } @@ -192,6 +232,7 @@ strip_claude_specific_frontmatter() { sync_target() { local label="$1" local target_dir="$2" + local selected_skills=("${SKILL_NAMES[@]}") log "target: $label" log "skills dir: $target_dir" @@ -200,7 +241,7 @@ sync_target() { mkdir -p "$target_dir" fi - for skill in "${SKILL_NAMES[@]}"; do + for skill in "${selected_skills[@]}"; do log "syncing [$label] skill: $skill" sync_one_skill "$skill" "$target_dir" done @@ -209,6 +250,145 @@ sync_target() { strip_claude_specific_frontmatter "$target_dir" } +install_codex_native_hooks() { + local target_dir="$1" + local runtime_root="$target_dir/humanize" + local hooks_installer="$REPO_ROOT/scripts/install-codex-hooks.sh" + local args=( + --codex-config-dir "$CODEX_CONFIG_DIR" + --runtime-root "$runtime_root" + ) + + [[ -x "$hooks_installer" ]] || die "missing Codex hooks installer: $hooks_installer" + [[ "$DRY_RUN" == "true" ]] && args+=(--dry-run) + + log "installing native Codex hooks into: $CODEX_CONFIG_DIR" + "$hooks_installer" "${args[@]}" +} + +install_codex_user_config() { + local runtime_root="$1" + local install_target="$2" + local user_config_dir="${HUMANIZE_USER_CONFIG_DIR}" + local user_config_file="$user_config_dir/config.json" + local default_config_file="$runtime_root/config/default_config.json" + + [[ -f "$default_config_file" ]] || die "missing default config: $default_config_file" + + if ! command -v python3 >/dev/null 2>&1; then + die "python3 is required to update Humanize user config for Codex installs" + fi + + if [[ "$DRY_RUN" == "true" ]]; then + log "DRY-RUN seed Codex-friendly BitLesson config in $user_config_file" + return + fi + + mkdir -p "$user_config_dir" + + python3 - "$default_config_file" "$user_config_file" "$install_target" <<'PY' +import json +import pathlib +import sys + +default_config = pathlib.Path(sys.argv[1]) +user_config = pathlib.Path(sys.argv[2]) +install_target = sys.argv[3] + +defaults = json.loads(default_config.read_text(encoding="utf-8")) +default_codex_model = defaults.get("codex_model") or "gpt-5.5" + +if user_config.exists(): + try: + data = json.loads(user_config.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + print(f"malformed existing user config: {user_config}: {exc}", file=sys.stderr) + sys.exit(2) + if not isinstance(data, dict): + print(f"existing user config is not a JSON object: {user_config}", file=sys.stderr) + sys.exit(2) +else: + data = {} + +if not data.get("bitlesson_model"): + data["bitlesson_model"] = data.get("codex_model") or default_codex_model + +if install_target == "codex" and not data.get("provider_mode"): + data["provider_mode"] = "codex-only" + +user_config.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8") +PY + case "$?" in + 0) + log "ensured BitLesson uses a Codex/OpenAI model in $user_config_file" + ;; + 2) + die "failed to update $user_config_file because it is malformed; fix it manually and rerun install" + ;; + *) + die "failed to update Humanize user config at $user_config_file" + ;; + esac +} + +install_bitlesson_selector_shim() { + local primary_runtime_root="$1" + local secondary_runtime_root="${2:-}" + local shim_path="$COMMAND_BIN_DIR/bitlesson-selector" + + if [[ "$DRY_RUN" == "true" ]]; then + log "DRY-RUN install bitlesson-selector shim into $shim_path" + return + fi + + mkdir -p "$COMMAND_BIN_DIR" + + # Escape paths for safe embedding in the generated script. + # Use single-quoted strings so shell metacharacters in paths are inert. + _escaped_primary=$(printf '%s' "$primary_runtime_root" | sed "s/'/'\\\\''/g") + + cat > "$shim_path" <<SHIM_EOF +#!/usr/bin/env bash +set -euo pipefail + +candidate_paths=( + '${_escaped_primary}/scripts/bitlesson-select.sh' +SHIM_EOF + + if [[ -n "$secondary_runtime_root" ]]; then + _escaped_secondary=$(printf '%s' "$secondary_runtime_root" | sed "s/'/'\\\\''/g") + cat >> "$shim_path" <<SHIM_EOF + '${_escaped_secondary}/scripts/bitlesson-select.sh' +SHIM_EOF + fi + + cat >> "$shim_path" <<'EOF' +) + +for candidate in "${candidate_paths[@]}"; do + if [[ -x "$candidate" ]]; then + exec "$candidate" "$@" + fi +done + +echo "Error: Humanize bitlesson selector runtime not found. Re-run install-skill.sh." >&2 +exit 1 +EOF + + chmod +x "$shim_path" + log "installed bitlesson-selector shim into: $shim_path" +} + +install_kimi_target() { + sync_target "kimi" "$KIMI_SKILLS_DIR" +} + +install_codex_target() { + sync_target "codex" "$CODEX_SKILLS_DIR" + install_codex_user_config "$CODEX_SKILLS_DIR/humanize" "$TARGET" + install_codex_native_hooks "$CODEX_SKILLS_DIR" +} + while [[ $# -gt 0 ]]; do case "$1" in --target) @@ -239,6 +419,16 @@ while [[ $# -gt 0 ]]; do CODEX_SKILLS_DIR="$2" shift 2 ;; + --codex-config-dir) + [[ -n "${2:-}" ]] || die "--codex-config-dir requires a value" + CODEX_CONFIG_DIR="$2" + shift 2 + ;; + --command-bin-dir) + [[ -n "${2:-}" ]] || die "--command-bin-dir requires a value" + COMMAND_BIN_DIR="$2" + shift 2 + ;; --dry-run) DRY_RUN="true" shift @@ -253,6 +443,7 @@ while [[ $# -gt 0 ]]; do esac done +resolve_source_layout "$REPO_ROOT" validate_repo if [[ -n "$LEGACY_SKILLS_DIR" ]]; then @@ -273,18 +464,23 @@ if [[ "$TARGET" == "kimi" || "$TARGET" == "both" ]]; then fi if [[ "$TARGET" == "codex" || "$TARGET" == "both" ]]; then log "codex skills dir: $CODEX_SKILLS_DIR" + log "codex config dir: $CODEX_CONFIG_DIR" fi +log "command bin dir: $COMMAND_BIN_DIR" case "$TARGET" in kimi) - sync_target "kimi" "$KIMI_SKILLS_DIR" + install_kimi_target + install_bitlesson_selector_shim "$KIMI_SKILLS_DIR/humanize" ;; codex) - sync_target "codex" "$CODEX_SKILLS_DIR" + install_codex_target + install_bitlesson_selector_shim "$CODEX_SKILLS_DIR/humanize" "$KIMI_SKILLS_DIR/humanize" ;; both) - sync_target "kimi" "$KIMI_SKILLS_DIR" - sync_target "codex" "$CODEX_SKILLS_DIR" + install_kimi_target + install_codex_target + install_bitlesson_selector_shim "$CODEX_SKILLS_DIR/humanize" "$KIMI_SKILLS_DIR/humanize" ;; esac @@ -304,6 +500,7 @@ fi if [[ "$TARGET" == "codex" || "$TARGET" == "both" ]]; then cat <<EOF - codex: $CODEX_SKILLS_DIR + - codex hooks: $CODEX_CONFIG_DIR/hooks.json EOF fi @@ -312,5 +509,9 @@ cat <<EOF Runtime root per target: <skills-dir>/humanize +Codex installs also update native hook/config state in: + $CODEX_CONFIG_DIR + No shell profile changes were made. +If $COMMAND_BIN_DIR is on PATH, the bitlesson-selector shim is now available there. EOF diff --git a/scripts/install-skills-codex.sh b/scripts/install-skills-codex.sh index 16b0375f..45d7476e 100755 --- a/scripts/install-skills-codex.sh +++ b/scripts/install-skills-codex.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Convenience wrapper: install Humanize skills for Codex target. # diff --git a/scripts/install-skills-kimi.sh b/scripts/install-skills-kimi.sh index 7bcbb775..15a94dd4 100755 --- a/scripts/install-skills-kimi.sh +++ b/scripts/install-skills-kimi.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Convenience wrapper: install Humanize skills for Kimi target. # diff --git a/scripts/lib/model-router.sh b/scripts/lib/model-router.sh index ff12412f..d39e6f25 100644 --- a/scripts/lib/model-router.sh +++ b/scripts/lib/model-router.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # model-router.sh - Shared model routing helpers # diff --git a/scripts/lib/monitor-common.sh b/scripts/lib/monitor-common.sh index 26bdaa9b..671a3100 100644 --- a/scripts/lib/monitor-common.sh +++ b/scripts/lib/monitor-common.sh @@ -1,8 +1,8 @@ -#!/bin/bash +#!/usr/bin/env bash # # monitor-common.sh - Shared utilities for humanize monitor functions # -# This file contains common functions used by both RLCR and PR loop monitors. +# This file contains common functions used by humanize monitor functions. # It should be sourced by humanize.sh rather than executed directly. # ======================================== @@ -130,7 +130,7 @@ monitor_restore_terminal() { monitor_get_status_color() { local status="$1" case "$status" in - active) echo "\033[1;32m" ;; # green + active|methodology-analysis) echo "\033[1;32m" ;; # green completed) echo "\033[1;36m" ;; # cyan failed|error|timeout) echo "\033[1;31m" ;; # red cancelled) echo "\033[1;33m" ;; # yellow @@ -159,7 +159,11 @@ monitor_find_state_file() { return fi - # Priority 1: state.md indicates active loop + # Priority 1: Active state files indicate running loop + if [[ -f "$session_dir/methodology-analysis-state.md" ]]; then + echo "$session_dir/methodology-analysis-state.md|methodology-analysis" + return + fi if [[ -f "$session_dir/state.md" ]]; then echo "$session_dir/state.md|active" return @@ -252,138 +256,44 @@ monitor_truncate_string() { } # ======================================== -# PR Loop Phase Detection +# Goal Tracker Parsing # ======================================== -# Detect current PR loop phase from file state -# Returns: one of: approved, cancelled, maxiter, codex_analyzing, waiting_initial_review, waiting_reviewer -# -# Usage: get_pr_loop_phase "/path/to/session" -# -# Detection strategy for codex_analyzing: -# 1. Find the latest round's pr-check.md file -# 2. Check if it's growing by comparing current size with cached previous size -# 3. Cache size in /tmp for comparison on next call -get_pr_loop_phase() { - local session_dir="$1" - - [[ ! -d "$session_dir" ]] && echo "unknown" && return - - # Check for final states first - [[ -f "$session_dir/approve-state.md" ]] && echo "approved" && return - [[ -f "$session_dir/cancel-state.md" ]] && echo "cancelled" && return - [[ -f "$session_dir/maxiter-state.md" ]] && echo "maxiter" && return - - # Check for Codex running by detecting file growth - # Find the highest numbered round pr-check file - local latest_check="" - local highest_round=-1 - while IFS= read -r f; do - [[ -z "$f" ]] && continue - local basename=$(basename "$f") - local round_str="${basename#round-}" - round_str="${round_str%-pr-check.md}" - if [[ "$round_str" =~ ^[0-9]+$ ]] && [[ "$round_str" -gt "$highest_round" ]]; then - highest_round="$round_str" - latest_check="$f" - fi - done < <(find "$session_dir" -maxdepth 1 -name 'round-*-pr-check.md' -type f 2>/dev/null) - - if [[ -n "$latest_check" ]]; then - # Get current file size - local current_size - current_size=$(stat -c%s "$latest_check" 2>/dev/null || stat -f%z "$latest_check" 2>/dev/null || echo 0) - - # Cache file for tracking size changes (unique per session) - local session_name=$(basename "$session_dir") - local cache_file="/tmp/humanize-phase-${session_name}-${highest_round}.size" - - # Read previous size from cache - local previous_size=0 - [[ -f "$cache_file" ]] && previous_size=$(cat "$cache_file" 2>/dev/null || echo 0) - - # Update cache with current size - echo "$current_size" > "$cache_file" 2>/dev/null || true - - # If file is growing OR is new (no previous record), Codex is analyzing - # Also check mtime as fallback (file modified in last 10 seconds) - local now_epoch file_epoch - now_epoch=$(date +%s) - file_epoch=$(stat -c %Y "$latest_check" 2>/dev/null || stat -f %m "$latest_check" 2>/dev/null || echo 0) - local age_seconds=$((now_epoch - file_epoch)) - - if [[ "$current_size" -gt "$previous_size" ]] || [[ "$age_seconds" -lt 10 ]]; then - echo "codex_analyzing" - return - fi +# Parse issue breakdown from goal-tracker.md +# Returns: blocking_issues|queued_issues|open_issues +# Usage: parse_goal_tracker_issue_counts "/path/to/goal-tracker.md" +parse_goal_tracker_issue_counts() { + local tracker_file="$1" + if [[ ! -f "$tracker_file" ]]; then + echo "0|0|0" + return fi - # Check state.md for round info - if [[ -f "$session_dir/state.md" ]]; then - local frontmatter - frontmatter=$(sed -n '/^---$/,/^---$/{ /^---$/d; p; }' "$session_dir/state.md" 2>/dev/null) + _count_table_rows() { + local start_pattern="$1" + local end_pattern="$2" + local row_count + row_count=$(sed -n "/${start_pattern}/,/${end_pattern}/p" "$tracker_file" | grep -cE '^\|' || true) + row_count=${row_count:-0} + echo $((row_count > 2 ? row_count - 2 : 0)) + } - local current_round - local startup_case - current_round=$(echo "$frontmatter" | grep "^current_round:" | sed "s/current_round: *//" | tr -d ' ') - startup_case=$(echo "$frontmatter" | grep "^startup_case:" | sed "s/startup_case: *//" | tr -d ' ') + local blocking_issues + local queued_issues + local open_issues - current_round=${current_round:-0} - startup_case=${startup_case:-1} + blocking_issues=$(_count_table_rows '### Blocking Side Issues' '^###') + queued_issues=$(_count_table_rows '### Queued Side Issues' '^###') + open_issues=$((blocking_issues + queued_issues)) - if [[ "$current_round" -eq 0 && "$startup_case" -eq 1 ]]; then - echo "waiting_initial_review" - else - echo "waiting_reviewer" - fi - else - echo "unknown" + if [[ "$open_issues" -eq 0 ]]; then + open_issues=$(_count_table_rows '### Open Issues' '^###') + blocking_issues="$open_issues" fi -} -# Get human-readable description for PR loop phase -# Usage: get_pr_loop_phase_display "waiting_reviewer" "claude,codex" -get_pr_loop_phase_display() { - local phase="$1" - local active_bots="$2" - - case "$phase" in - approved) - echo "All reviews approved" - ;; - cancelled) - echo "Loop cancelled" - ;; - maxiter) - echo "Max iterations reached" - ;; - codex_analyzing) - echo "Codex analyzing reviews..." - ;; - waiting_initial_review) - if [[ -n "$active_bots" && "$active_bots" != "none" ]]; then - echo "Waiting for initial PR review from $active_bots" - else - echo "Waiting for initial PR review" - fi - ;; - waiting_reviewer) - if [[ -n "$active_bots" && "$active_bots" != "none" ]]; then - echo "Waiting for $active_bots (polling...)" - else - echo "Waiting for reviews (polling...)" - fi - ;; - *) - echo "Unknown phase" - ;; - esac + echo "${blocking_issues}|${queued_issues}|${open_issues}" } -# ======================================== -# Goal Tracker Parsing -# ======================================== - # Parse goal-tracker.md and return summary values # Returns: total_acs|completed_acs|active_tasks|completed_tasks|deferred_tasks|open_issues|goal_summary # Usage: parse_goal_tracker "/path/to/goal-tracker.md" @@ -448,9 +358,19 @@ parse_goal_tracker() { local deferred_tasks deferred_tasks=$(_count_table_rows '### Explicitly Deferred' '^###') - # Count Open Issues + # Count Open Issues (new schema prefers Blocking/Queued Side Issues; old schema used Open Issues) + local issue_parts_raw local open_issues - open_issues=$(_count_table_rows '### Open Issues' '^###') + issue_parts_raw=$(parse_goal_tracker_issue_counts "$tracker_file") + if [[ -n "${ZSH_VERSION:-}" ]]; then + local -a issue_parts + issue_parts=("${(@s:|:)issue_parts_raw}") + open_issues="${issue_parts[3]}" + else + local -a issue_parts + IFS='|' read -r -a issue_parts <<< "$issue_parts_raw" + open_issues="${issue_parts[2]}" + fi # Extract Ultimate Goal summary local goal_summary @@ -462,37 +382,3 @@ parse_goal_tracker() { echo "${total_acs}|${completed_acs}|${active_tasks}|${completed_tasks}|${deferred_tasks}|${open_issues}|${goal_summary}" } -# Parse PR goal-tracker.md for issue statistics -# Returns: total_issues|resolved_issues|remaining_issues|last_reviewer -# Usage: humanize_parse_pr_goal_tracker "/path/to/goal-tracker.md" -humanize_parse_pr_goal_tracker() { - local tracker_file="$1" - if [[ ! -f "$tracker_file" ]]; then - echo "0|0|0|none" - return - fi - - # Extract from Total Statistics section - # Format: - Total Issues Found: N - local total_issues - total_issues=$(grep -E "^- Total Issues Found:" "$tracker_file" | sed 's/.*: //' | tr -d ' ') - total_issues=${total_issues:-0} - - local resolved_issues - resolved_issues=$(grep -E "^- Total Issues Resolved:" "$tracker_file" | sed 's/.*: //' | tr -d ' ') - resolved_issues=${resolved_issues:-0} - - local remaining_issues - remaining_issues=$(grep -E "^- Remaining:" "$tracker_file" | sed 's/.*: //' | tr -d ' ') - remaining_issues=${remaining_issues:-0} - - # Get last reviewer from Issue Summary table (last row, Reviewer column) - # Table format: | ID | Reviewer | Round | Status | Description | - # Pattern matches rows like "|1|..." or "| 1 |..." (with or without spaces) - local last_reviewer - last_reviewer=$(sed -n '/## Issue Summary/,/^##/p' "$tracker_file" \ - | grep -E '^\|[[:space:]]*[0-9]+' | tail -1 | cut -d'|' -f3 | tr -d ' ') - last_reviewer=${last_reviewer:-none} - - echo "${total_issues}|${resolved_issues}|${remaining_issues}|${last_reviewer}" -} diff --git a/scripts/lib/monitor-skill.sh b/scripts/lib/monitor-skill.sh index 359dc641..8803f139 100644 --- a/scripts/lib/monitor-skill.sh +++ b/scripts/lib/monitor-skill.sh @@ -1,17 +1,20 @@ -#!/bin/bash +#!/usr/bin/env bash # # monitor-skill.sh - Skill monitor for humanize # # Provides the _humanize_monitor_skill function for monitoring -# ask-codex skill invocations from .humanize/skill directory. +# skill invocations (ask-codex, ask-gemini) from .humanize/skill directory. # # This file is sourced by humanize.sh and depends on: # - monitor-common.sh (monitor_get_yaml_value, monitor_format_timestamp, etc.) # - humanize.sh (humanize_split_to_array) -# Monitor ask-codex skill invocations from .humanize/skill +# Monitor skill invocations from .humanize/skill # Shows a fixed status bar with aggregate stats and latest invocation details, # with live output display in the scrollable area below. +# +# Accepts --tool-filter <codex|gemini> to show only invocations from a +# specific tool. Without the filter, all invocations are shown. _humanize_monitor_skill() { # Enable 0-indexed arrays in zsh for bash compatibility # no_monitor suppresses background job notifications ([1] PID) @@ -23,11 +26,16 @@ _humanize_monitor_skill() { local check_interval=2 local status_bar_height=9 local once_mode=false + local tool_filter="" # Parse arguments while [[ $# -gt 0 ]]; do case "$1" in --once) once_mode=true; shift ;; + --tool-filter) + tool_filter="${2:-}" + shift 2 + ;; *) shift ;; esac done @@ -35,10 +43,37 @@ _humanize_monitor_skill() { # Check if .humanize/skill exists if [[ ! -d "$skill_dir" ]]; then echo "Error: $skill_dir directory not found in current directory" - echo "Run /humanize:ask-codex first to create skill invocations" + echo "Run /humanize:ask-codex or /humanize:ask-gemini first to create skill invocations" return 1 fi + # Determine the tool for a given invocation directory. + # Reads metadata.md first (completed), falls back to input.md (running). + # Returns: codex, gemini, or unknown + _skill_get_tool() { + local dir="$1" + if [[ -f "$dir/metadata.md" ]]; then + local t=$(monitor_get_yaml_value "tool" "$dir/metadata.md") + [[ -n "$t" ]] && { echo "$t"; return; } + fi + if [[ -f "$dir/input.md" ]]; then + local t=$(grep -E '^- Tool:' "$dir/input.md" 2>/dev/null | sed 's/- Tool: //') + [[ -n "$t" ]] && { echo "$t"; return; } + fi + echo "unknown" + } + + # Check whether a directory passes the current tool filter. + # Returns 0 (pass) or 1 (skip). + _skill_passes_filter() { + [[ -z "$tool_filter" ]] && return 0 + local t=$(_skill_get_tool "$1") + [[ "$t" == "$tool_filter" ]] && return 0 + # Legacy invocations without a tool tag are treated as codex + [[ "$t" == "unknown" && "$tool_filter" == "codex" ]] && return 0 + return 1 + } + # List all valid skill invocation directories sorted newest-first # Skill dirs use YYYY-MM-DD_HH-MM-SS or YYYY-MM-DD_HH-MM-SS-PID-RANDOM naming _skill_list_dirs_sorted() { @@ -47,7 +82,9 @@ _humanize_monitor_skill() { [[ -z "$d" ]] && continue [[ ! -d "$d" ]] && continue local name=$(basename "$d") - [[ "$name" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{2}-[0-9]{2}-[0-9]{2} ]] && dirs+=("$d") + [[ "$name" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{2}-[0-9]{2}-[0-9]{2} ]] || continue + _skill_passes_filter "$d" || continue + dirs+=("$d") done < <(find "$skill_dir" -mindepth 1 -maxdepth 1 -type d 2>/dev/null) printf '%s\n' "${dirs[@]}" | sort -r } @@ -88,6 +125,7 @@ _humanize_monitor_skill() { [[ ! -d "$d" ]] && continue local name=$(basename "$d") [[ ! "$name" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{2}-[0-9]{2}-[0-9]{2} ]] && continue + _skill_passes_filter "$d" || continue ((total++)) if [[ -f "$d/metadata.md" ]]; then local st=$(monitor_get_yaml_value "status" "$d/metadata.md") @@ -127,6 +165,7 @@ _humanize_monitor_skill() { # Find the best file to monitor for a skill invocation # Searches both global cache (~/.cache/humanize/), local cache ($dir/cache/), # and project-local files (.humanize/skill/) for the best content. + # Supports both codex (codex-run.*) and gemini (gemini-run.*) cache files. _skill_find_monitored_file() { local dir="$1" local gcache=$(_skill_find_cache_dir "$dir") @@ -134,18 +173,29 @@ _humanize_monitor_skill() { local is_running=false [[ ! -f "$dir/metadata.md" ]] && is_running=true + # Determine which tool produced this invocation for cache file naming + local inv_tool=$(_skill_get_tool "$dir") + local run_prefix="codex-run" + [[ "$inv_tool" == "gemini" ]] && run_prefix="gemini-run" + # Helper: check a cache directory for best file # Args: cache_dir, prefer_log (true for running, false for completed) _check_cache_files() { local c="$1" prefer_log="$2" [[ ! -d "$c" ]] && return if [[ "$prefer_log" == "true" ]]; then + [[ -f "$c/${run_prefix}.log" && -s "$c/${run_prefix}.log" ]] && { echo "$c/${run_prefix}.log"; return; } + [[ -f "$c/${run_prefix}.out" && -s "$c/${run_prefix}.out" ]] && { echo "$c/${run_prefix}.out"; return; } + [[ -f "$c/${run_prefix}.log" ]] && { echo "$c/${run_prefix}.log"; return; } + # Fallback: try the other prefix for legacy/mixed invocations [[ -f "$c/codex-run.log" && -s "$c/codex-run.log" ]] && { echo "$c/codex-run.log"; return; } - [[ -f "$c/codex-run.out" && -s "$c/codex-run.out" ]] && { echo "$c/codex-run.out"; return; } - [[ -f "$c/codex-run.log" ]] && { echo "$c/codex-run.log"; return; } + [[ -f "$c/gemini-run.log" && -s "$c/gemini-run.log" ]] && { echo "$c/gemini-run.log"; return; } else + [[ -f "$c/${run_prefix}.out" && -s "$c/${run_prefix}.out" ]] && { echo "$c/${run_prefix}.out"; return; } + [[ -f "$c/${run_prefix}.log" && -s "$c/${run_prefix}.log" ]] && { echo "$c/${run_prefix}.log"; return; } + # Fallback [[ -f "$c/codex-run.out" && -s "$c/codex-run.out" ]] && { echo "$c/codex-run.out"; return; } - [[ -f "$c/codex-run.log" && -s "$c/codex-run.log" ]] && { echo "$c/codex-run.log"; return; } + [[ -f "$c/gemini-run.out" && -s "$c/gemini-run.out" ]] && { echo "$c/gemini-run.out"; return; } fi } @@ -166,6 +216,15 @@ _humanize_monitor_skill() { echo "" } + # Build the monitor title based on filter + _skill_monitor_title() { + case "$tool_filter" in + codex) echo " Humanize Skill Monitor [codex]" ;; + gemini) echo " Humanize Skill Monitor [gemini]" ;; + *) echo " Humanize Skill Monitor" ;; + esac + } + # Draw the status bar at the top _skill_draw_status_bar() { local latest_dir="$1" @@ -186,17 +245,21 @@ _humanize_monitor_skill() { # Parse latest invocation metadata local inv_status="running" model="N/A" effort="N/A" duration="N/A" started_at="N/A" + local inv_tool="unknown" if [[ -n "$latest_dir" && -f "$latest_dir/metadata.md" ]]; then inv_status=$(monitor_get_yaml_value "status" "$latest_dir/metadata.md") model=$(monitor_get_yaml_value "model" "$latest_dir/metadata.md") effort=$(monitor_get_yaml_value "effort" "$latest_dir/metadata.md") duration=$(monitor_get_yaml_value "duration" "$latest_dir/metadata.md") started_at=$(monitor_get_yaml_value "started_at" "$latest_dir/metadata.md") + inv_tool=$(monitor_get_yaml_value "tool" "$latest_dir/metadata.md") elif [[ -n "$latest_dir" && -f "$latest_dir/input.md" ]]; then model=$(grep -E '^- Model:' "$latest_dir/input.md" 2>/dev/null | sed 's/- Model: //') effort=$(grep -E '^- Effort:' "$latest_dir/input.md" 2>/dev/null | sed 's/- Effort: //') + inv_tool=$(grep -E '^- Tool:' "$latest_dir/input.md" 2>/dev/null | sed 's/- Tool: //') fi inv_status="${inv_status:-unknown}"; model="${model:-N/A}"; effort="${effort:-N/A}" + inv_tool="${inv_tool:-unknown}" # Status color local status_color="$dim" @@ -235,11 +298,19 @@ _humanize_monitor_skill() { cache_display="...${cache_display: -$csuffix_len}" fi + # Model display: for gemini, no effort; for codex, show (effort) + local model_display="$model" + if [[ "$inv_tool" == "gemini" ]] || [[ "$effort" == "N/A" ]]; then + model_display="$model" + else + model_display="$model ($effort)" + fi + tput sc tput cup 0 0 # Line 1: Title - printf "${bg}${bold}%-${term_width}s${reset}${clr_eol}\n" " Humanize Skill Monitor" + printf "${bg}${bold}%-${term_width}s${reset}${clr_eol}\n" "$(_skill_monitor_title)" # Line 2: Aggregate stats printf "${cyan}Total:${reset} ${bold}${total}${reset} invocations" [[ "$success" -gt 0 ]] && printf " | ${green}${success} success${reset}" @@ -248,8 +319,8 @@ _humanize_monitor_skill() { [[ "$empty" -gt 0 ]] && printf " | ${yellow}${empty} empty${reset}" [[ "$running" -gt 0 ]] && printf " | ${yellow}${running} running${reset}" printf "${clr_eol}\n" - # Line 3: Focused invocation status + model + duration - printf "${magenta}Focused:${reset} ${status_color}%s${reset} | ${yellow}Model:${reset} %s (%s) | ${cyan}Duration:${reset} %s${clr_eol}\n" "$inv_status" "$model" "$effort" "${duration:-N/A}" + # Line 3: Focused invocation status + tool + model + duration + printf "${magenta}Focused:${reset} ${status_color}%s${reset} | ${dim}[%s]${reset} ${yellow}Model:${reset} %s | ${cyan}Duration:${reset} %s${clr_eol}\n" "$inv_status" "$inv_tool" "$model_display" "${duration:-N/A}" # Line 4: Started at printf "${cyan}Started:${reset} %s${clr_eol}\n" "$start_display" # Line 5: Question @@ -269,7 +340,9 @@ _humanize_monitor_skill() { if [[ "$once_mode" == "true" ]]; then local latest=$(_skill_find_latest_dir) if [[ -z "$latest" ]]; then - echo "No skill invocations found in $skill_dir" + local filter_msg="" + [[ -n "$tool_filter" ]] && filter_msg=" (filter: $tool_filter)" + echo "No skill invocations found in $skill_dir$filter_msg" return 1 fi @@ -283,24 +356,29 @@ _humanize_monitor_skill() { local -a stats humanize_split_to_array stats "$(_skill_count_stats)" local inv_status="running" model="N/A" effort="N/A" duration="N/A" started_at="N/A" + local inv_tool="unknown" if [[ -f "$focus_dir/metadata.md" ]]; then inv_status=$(monitor_get_yaml_value "status" "$focus_dir/metadata.md") model=$(monitor_get_yaml_value "model" "$focus_dir/metadata.md") effort=$(monitor_get_yaml_value "effort" "$focus_dir/metadata.md") duration=$(monitor_get_yaml_value "duration" "$focus_dir/metadata.md") started_at=$(monitor_get_yaml_value "started_at" "$focus_dir/metadata.md") + inv_tool=$(monitor_get_yaml_value "tool" "$focus_dir/metadata.md") fi + inv_tool="${inv_tool:-unknown}" local question=$(_skill_get_question "$focus_dir") local cache_dir=$(_skill_find_cache_dir "$focus_dir") + local title=$(_skill_monitor_title) echo "==========================================" - echo " Humanize Skill Monitor" + echo "$title" echo "==========================================" echo "" echo "Total Invocations: ${stats[0]}" echo " Success: ${stats[1]} Error: ${stats[2]} Timeout: ${stats[3]} Empty: ${stats[4]} Running: ${stats[5]}" echo "" echo "Focused: $(basename "$focus_dir")" + echo " Tool: ${inv_tool}" echo " Status: ${inv_status:-unknown}" echo " Model: ${model:-N/A} (${effort:-N/A})" echo " Duration: ${duration:-N/A}" @@ -329,14 +407,16 @@ _humanize_monitor_skill() { while IFS= read -r d; do [[ -z "$d" ]] && continue local name=$(basename "$d") - local st="running" dur="" + local st="running" dur="" t="?" if [[ -f "$d/metadata.md" ]]; then st=$(monitor_get_yaml_value "status" "$d/metadata.md") dur=$(monitor_get_yaml_value "duration" "$d/metadata.md") + t=$(monitor_get_yaml_value "tool" "$d/metadata.md") fi + t="${t:-?}" local q=$(_skill_get_question "$d") [[ ${#q} -gt 50 ]] && q="${q:0:47}..." - printf " %-38s %-14s %-6s %s\n" "$name" "$st" "$dur" "$q" + printf " %-38s %-7s %-14s %-6s %s\n" "$name" "[$t]" "$st" "$dur" "$q" ((count++)) [[ $count -ge 10 ]] && break done < <(_skill_list_dirs_sorted) diff --git a/scripts/poll-pr-reviews.sh b/scripts/poll-pr-reviews.sh deleted file mode 100755 index 4292dd2d..00000000 --- a/scripts/poll-pr-reviews.sh +++ /dev/null @@ -1,328 +0,0 @@ -#!/bin/bash -# -# Poll for new PR reviews from specified bots -# -# Checks for new comments from specified bots after a given timestamp. -# -# Usage: -# poll-pr-reviews.sh <pr_number> --after <timestamp> --bots <bot1,bot2> -# -# Output: JSON with new comments from the bots, or empty array if none -# - -set -euo pipefail - -# ======================================== -# Parse Arguments -# ======================================== - -PR_NUMBER="" -AFTER_TIMESTAMP="" -BOTS="" - -while [[ $# -gt 0 ]]; do - case $1 in - --after) - if [[ -z "${2:-}" ]]; then - echo "Error: --after requires a timestamp argument" >&2 - exit 1 - fi - AFTER_TIMESTAMP="$2" - shift 2 - ;; - --bots) - if [[ -z "${2:-}" ]]; then - echo "Error: --bots requires a comma-separated list of bot names" >&2 - exit 1 - fi - BOTS="$2" - shift 2 - ;; - -h|--help) - cat << 'HELP_EOF' -poll-pr-reviews.sh - Poll for new PR reviews from bots - -USAGE: - poll-pr-reviews.sh <pr_number> --after <timestamp> --bots <bot1,bot2> - -ARGUMENTS: - <pr_number> The PR number to poll - -OPTIONS: - --after <timestamp> Only return comments after this ISO 8601 timestamp - --bots <bot1,bot2> Comma-separated list of bot names to watch - -h, --help Show this help message - -OUTPUT: - JSON object with: - - comments: Array of new comments from watched bots - - bots_responded: Array of bot names that have new comments - - has_new_comments: Boolean indicating if any new comments found - -EXAMPLE: - poll-pr-reviews.sh 123 --after 2026-01-18T12:00:00Z --bots claude,codex -HELP_EOF - exit 0 - ;; - -*) - echo "Error: Unknown option: $1" >&2 - exit 1 - ;; - *) - if [[ -z "$PR_NUMBER" ]]; then - PR_NUMBER="$1" - else - echo "Error: Unexpected argument: $1" >&2 - exit 1 - fi - shift - ;; - esac -done - -# Validate arguments -if [[ -z "$PR_NUMBER" ]]; then - echo "Error: PR number is required" >&2 - exit 1 -fi - -if [[ -z "$AFTER_TIMESTAMP" ]]; then - echo "Error: --after timestamp is required" >&2 - exit 1 -fi - -if [[ -z "$BOTS" ]]; then - echo "Error: --bots list is required" >&2 - exit 1 -fi - -if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Error: Invalid PR number: $PR_NUMBER" >&2 - exit 1 -fi - -# ======================================== -# Check Prerequisites -# ======================================== - -if ! command -v gh &>/dev/null; then - echo "Error: GitHub CLI (gh) is required" >&2 - exit 1 -fi - -if ! command -v jq &>/dev/null; then - echo "Error: jq is required for JSON parsing" >&2 - exit 1 -fi - -# ======================================== -# Get Repository Info -# ======================================== - -# IMPORTANT: For fork PRs, we need to resolve the base (upstream) repository -# gh pr view without --repo fails in forks because the PR number doesn't exist there -# Strategy: First get current repo, then try to get PR's base repo with --repo flag - -# Step 1: Get the current repo (works in both forks and base repos) -CURRENT_REPO=$(gh repo view --json owner,name -q '.owner.login + "/" + .name' 2>/dev/null) || { - echo "Error: Failed to get current repository" >&2 - exit 1 -} - -# Step 2: Determine the correct repo for PR operations -# Try current repo first - if PR exists there, use it -PR_BASE_REPO="" -if gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" -else - # PR not found in current repo - check if this is a fork and try parent repo - PARENT_REPO=$(gh repo view --json parent -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - fi - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - echo "Error: Failed to find PR #$PR_NUMBER in current or parent repository" >&2 - exit 1 -fi - -REPO_OWNER="${PR_BASE_REPO%%/*}" -REPO_NAME="${PR_BASE_REPO##*/}" - -if [[ -z "$REPO_OWNER" || -z "$REPO_NAME" ]]; then - echo "Error: Could not parse repository owner/name from: $PR_BASE_REPO" >&2 - exit 1 -fi - -# ======================================== -# Build Bot Filter -# ======================================== - -# Map bot names to GitHub comment author names: -# - claude -> claude[bot] -# - codex -> chatgpt-codex-connector[bot] -map_bot_to_author() { - local bot="$1" - case "$bot" in - codex) echo "chatgpt-codex-connector[bot]" ;; - *) echo "${bot}[bot]" ;; - esac -} - -# Convert comma-separated bots to jq filter pattern -BOT_PATTERNS="" -IFS=',' read -ra BOT_ARRAY <<< "$BOTS" -for bot in "${BOT_ARRAY[@]}"; do - bot=$(echo "$bot" | tr -d ' ') - author=$(map_bot_to_author "$bot") - if [[ -n "$BOT_PATTERNS" ]]; then - BOT_PATTERNS="$BOT_PATTERNS|" - fi - # Escape brackets for regex - BOT_PATTERNS="${BOT_PATTERNS}${author//\[/\\[}" - BOT_PATTERNS="${BOT_PATTERNS//\]/\\]}" -done - -# ======================================== -# Fetch and Filter Comments -# ======================================== - -# Create temporary files -TEMP_DIR=$(mktemp -d) -trap 'rm -rf "$TEMP_DIR"' EXIT - -ALL_COMMENTS_FILE="$TEMP_DIR/all_comments.json" -FILTERED_FILE="$TEMP_DIR/filtered.json" - -# Retry configuration -MAX_RETRIES=3 -RETRY_DELAY=2 - -# Track API failures (for diagnostics, not script termination) -API_FAILURES=0 - -# Function to fetch with retries -# Returns 0 even on failure to prevent script termination under set -euo pipefail -# On failure, outputs empty array "[]" so jq processing continues gracefully -fetch_with_retry() { - local endpoint="$1" - local attempt=1 - local result="" - - while [[ $attempt -le $MAX_RETRIES ]]; do - result=$(gh api "$endpoint" --paginate 2>/dev/null) && { - echo "$result" - return 0 - } - - if [[ $attempt -lt $MAX_RETRIES ]]; then - echo "Warning: API fetch failed (attempt $attempt/$MAX_RETRIES), retrying..." >&2 - sleep "$RETRY_DELAY" - else - echo "Warning: API fetch failed after $MAX_RETRIES attempts for $endpoint" >&2 - API_FAILURES=$((API_FAILURES + 1)) - fi - ((attempt++)) - done - - # Return empty array and success (0) to allow polling to continue - # Partial API outages shouldn't terminate the entire poll loop - echo "[]" - return 0 -} - -# Initialize empty array -echo "[]" > "$ALL_COMMENTS_FILE" - -# Fetch issue comments -ISSUE_COMMENTS=$(fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/issues/$PR_NUMBER/comments") -echo "$ISSUE_COMMENTS" | jq -r --arg type "issue_comment" ' - if type == "array" then - [.[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .created_at, - body: .body - }] - else - [] - end -' > "$TEMP_DIR/issue.json" - -# Fetch review comments -REVIEW_COMMENTS=$(fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/pulls/$PR_NUMBER/comments") -echo "$REVIEW_COMMENTS" | jq -r --arg type "review_comment" ' - if type == "array" then - [.[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .created_at, - body: .body, - path: .path, - line: (.line // .original_line) - }] - else - [] - end -' > "$TEMP_DIR/review.json" - -# Fetch PR reviews -# Note: Include all reviews, even those with empty body (e.g. approval-only reviews) -# For empty body reviews, use a placeholder indicating the state -PR_REVIEWS=$(fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/pulls/$PR_NUMBER/reviews") -echo "$PR_REVIEWS" | jq -r --arg type "pr_review" ' - if type == "array" then - [.[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .submitted_at, - body: (if .body == null or .body == "" then "[Review state: \(.state)]" else .body end), - state: .state - }] - else - [] - end -' > "$TEMP_DIR/reviews.json" - -# Combine all comments -jq -s 'add' "$TEMP_DIR/issue.json" "$TEMP_DIR/review.json" "$TEMP_DIR/reviews.json" > "$ALL_COMMENTS_FILE" - -# Filter: after timestamp AND from watched bots -jq --arg after "$AFTER_TIMESTAMP" --arg pattern "$BOT_PATTERNS" ' - [.[] | select( - .created_at >= $after and - (.author | test($pattern; "i")) - )] -' "$ALL_COMMENTS_FILE" > "$FILTERED_FILE" - -# ======================================== -# Build Output -# ======================================== - -COMMENT_COUNT=$(jq 'length' "$FILTERED_FILE") - -# Get list of bots that responded -BOTS_RESPONDED=$(jq -r '[.[] | .author] | unique | join(",")' "$FILTERED_FILE") - -# Build final output -jq -n \ - --argjson comments "$(cat "$FILTERED_FILE")" \ - --arg bots_responded "$BOTS_RESPONDED" \ - --argjson has_new $(if [[ "$COMMENT_COUNT" -gt 0 ]]; then echo "true"; else echo "false"; fi) \ - '{ - comments: $comments, - bots_responded: ($bots_responded | split(",") | map(select(length > 0))), - has_new_comments: $has_new, - comment_count: ($comments | length) - }' - -exit 0 diff --git a/scripts/portable-timeout.sh b/scripts/portable-timeout.sh index 318ef72e..2dcd9308 100755 --- a/scripts/portable-timeout.sh +++ b/scripts/portable-timeout.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Portable timeout wrapper for macOS/Linux compatibility # Usage: source portable-timeout.sh; run_with_timeout <seconds> <command> [args...] diff --git a/scripts/rlcr-stop-gate.sh b/scripts/rlcr-stop-gate.sh index 306f875c..a4cd6329 100755 --- a/scripts/rlcr-stop-gate.sh +++ b/scripts/rlcr-stop-gate.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Run RLCR stop-hook logic from non-hook environments (e.g. skill workflows). # @@ -18,12 +18,19 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" HUMANIZE_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" + +# Deterministic project-root resolver (CLAUDE_PROJECT_DIR -> git toplevel, no pwd fallback). +# Overridable via --project-root for non-hook callers; the flag handler below +# always wins because it runs after this default assignment. +source "$HUMANIZE_ROOT/hooks/lib/project-root.sh" +PROJECT_ROOT="$(resolve_project_root 2>/dev/null || true)" HOOK_SCRIPT="$HUMANIZE_ROOT/hooks/loop-codex-stop-hook.sh" SESSION_ID="${CLAUDE_SESSION_ID:-}" TRANSCRIPT_PATH="${CLAUDE_TRANSCRIPT_PATH:-}" PRINT_JSON="false" +HOOK_MODEL="${CODEX_MODEL:-humanize-skill-gate}" +HOOK_PERMISSION_MODE="${CODEX_PERMISSION_MODE:-default}" usage() { cat <<'EOF' @@ -71,6 +78,14 @@ while [[ $# -gt 0 ]]; do esac done +if [[ -z "$PROJECT_ROOT" ]]; then + # No humanize project context reachable from here -- nothing to enforce. + # Allow the stop to proceed instead of returning a wrapper error so that + # invoking the gate outside any project (or any git repo) is benign. + echo "ALLOW: no humanize project root resolved." + exit 0 +fi + if [[ ! -x "$HOOK_SCRIPT" ]]; then echo "Error: Hook script not found or not executable: $HOOK_SCRIPT" >&2 exit 20 @@ -81,19 +96,30 @@ if ! command -v jq >/dev/null 2>&1; then exit 20 fi -# Build hook input JSON while omitting empty fields. -# Include standard Stop hook fields so the underlying hook sees the same schema -# as a real Claude Code Stop event (hook_event_name, stop_hook_active, cwd). +# Build hook input JSON. Include standard Stop hook fields so the underlying +# hook sees the same schema as a real Claude Code Stop event +# (hook_event_name, stop_hook_active, cwd). +# +# Empty session_id / transcript_path become explicit null instead of being +# filtered out; a `select(length > 0)` used as a plain object value collapses +# the entire enclosing object to empty whenever any selected field is empty, +# which would hide forwarded fields like transcript_path when only session_id +# is missing. HOOK_INPUT=$(jq -n \ --arg session_id "$SESSION_ID" \ --arg transcript_path "$TRANSCRIPT_PATH" \ --arg cwd "$PROJECT_ROOT" \ + --arg model "$HOOK_MODEL" \ + --arg permission_mode "$HOOK_PERMISSION_MODE" \ '{ hook_event_name: "Stop", stop_hook_active: false, cwd: $cwd, - session_id: ($session_id | select(length > 0)), - transcript_path: ($transcript_path | select(length > 0)) + model: $model, + permission_mode: $permission_mode, + last_assistant_message: null, + session_id: (if ($session_id | length) > 0 then $session_id else null end), + transcript_path: (if ($transcript_path | length) > 0 then $transcript_path else null end) }') # Capture hook exit code explicitly to map non-zero to exit 20 (wrapper error) @@ -133,6 +159,20 @@ if [[ "$DECISION" == "block" ]]; then exit 10 fi +# No decision field in the JSON: per Claude Code Stop-hook spec this means +# allow the stop. Surface any systemMessage so callers see the reason +# (e.g. "background task(s) still running"), then exit 0. +if [[ -z "$DECISION" ]]; then + if [[ "$PRINT_JSON" == "true" ]]; then + printf '%s\n' "$HOOK_OUTPUT" + elif [[ -n "$SYSTEM_MESSAGE" ]]; then + printf 'ALLOW: %s\n' "$SYSTEM_MESSAGE" + else + echo "ALLOW: stop gate passed." + fi + exit 0 +fi + echo "Error: Unexpected hook decision: ${DECISION:-<empty>}" >&2 printf '%s\n' "$HOOK_OUTPUT" >&2 exit 20 diff --git a/scripts/setup-pr-loop.sh b/scripts/setup-pr-loop.sh deleted file mode 100755 index fabf36e3..00000000 --- a/scripts/setup-pr-loop.sh +++ /dev/null @@ -1,945 +0,0 @@ -#!/bin/bash -# -# Setup script for start-pr-loop -# -# Creates state files for the PR loop that monitors GitHub PR reviews from bots. -# -# Usage: -# setup-pr-loop.sh --claude|--codex [--max N] [--codex-model MODEL:EFFORT] [--codex-timeout SECONDS] -# - -set -euo pipefail - -# ======================================== -# Default Configuration -# ======================================== - -# Override effort before sourcing loop-common.sh (PR loop defaults to medium effort). -# codex_model is NOT pre-set here so that config-backed values from loop-common.sh apply. -DEFAULT_CODEX_EFFORT="medium" -DEFAULT_CODEX_TIMEOUT=900 -DEFAULT_MAX_ITERATIONS=42 - -# Polling configuration -POLL_INTERVAL=30 -POLL_TIMEOUT=900 # 15 minutes per bot - -# Default timeout for git operations (30 seconds) -GIT_TIMEOUT=30 - -# Default timeout for GitHub CLI operations (60 seconds) -GH_TIMEOUT=60 - -# Source portable timeout wrapper -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -source "$SCRIPT_DIR/portable-timeout.sh" - -# Source template loader and shared loop library (provides DEFAULT_CODEX_MODEL and other constants) -HOOKS_LIB_DIR="$(cd "$SCRIPT_DIR/../hooks/lib" && pwd)" -source "$HOOKS_LIB_DIR/template-loader.sh" -source "$HOOKS_LIB_DIR/loop-common.sh" - -# Initialize template directory -TEMPLATE_DIR="${TEMPLATE_DIR:-$(get_template_dir "$HOOKS_LIB_DIR")}" - -# ======================================== -# Parse Arguments -# ======================================== - -MAX_ITERATIONS="$DEFAULT_MAX_ITERATIONS" -CODEX_MODEL="$DEFAULT_CODEX_MODEL" -CODEX_EFFORT="$DEFAULT_CODEX_EFFORT" -CODEX_TIMEOUT="$DEFAULT_CODEX_TIMEOUT" - -# Bot flags -BOT_CLAUDE="false" -BOT_CODEX="false" - -show_help() { - cat << 'HELP_EOF' -start-pr-loop - PR review loop with remote bot monitoring - -USAGE: - /humanize:start-pr-loop --claude|--codex [OPTIONS] - -BOT FLAGS (at least one required): - --claude Monitor reviews from claude[bot] (trigger: @claude) - --codex Monitor reviews from chatgpt-codex-connector[bot] (trigger: @codex) - -OPTIONS: - --max <N> Maximum iterations before auto-stop (default: 42) - --codex-model <MODEL:EFFORT> - Codex model and reasoning effort (default from config, effort: medium) - --codex-timeout <SECONDS> - Timeout for each Codex review in seconds (default: 900) - -h, --help Show this help message - -DESCRIPTION: - Starts a PR review loop that: - - 1. Detects the PR associated with the current branch - 2. Fetches review comments from the specified bot(s) - 3. Analyzes and fixes issues identified by the bot(s) - 4. Pushes changes and triggers re-review by commenting @bot - 5. Waits for bot response (polls every 30s, 15min timeout) - 6. Uses local Codex to verify if remote concerns are valid - - The flow: - 1. Claude analyzes PR comments and fixes issues - 2. Claude pushes changes and comments @bot on PR - 3. Stop Hook polls for new bot reviews - 4. When reviews arrive, local Codex validates them - 5. If issues found, Claude continues fixing - 6. If all bots approve, loop ends - -EXAMPLES: - /humanize:start-pr-loop --claude - /humanize:start-pr-loop --codex --max 20 - /humanize:start-pr-loop --claude --codex - -STOPPING: - - /humanize:cancel-pr-loop Cancel the active PR loop - - Reach --max iterations - - All bots approve the changes - -MONITORING: - humanize monitor pr -HELP_EOF - exit 0 -} - -while [[ $# -gt 0 ]]; do - case $1 in - -h|--help) - show_help - ;; - --claude) - BOT_CLAUDE="true" - shift - ;; - --codex) - BOT_CODEX="true" - shift - ;; - --max) - if [[ -z "${2:-}" ]]; then - echo "Error: --max requires a number argument" >&2 - exit 1 - fi - if ! [[ "$2" =~ ^[0-9]+$ ]]; then - echo "Error: --max must be a positive integer, got: $2" >&2 - exit 1 - fi - MAX_ITERATIONS="$2" - shift 2 - ;; - --codex-model) - if [[ -z "${2:-}" ]]; then - echo "Error: --codex-model requires a MODEL:EFFORT argument" >&2 - exit 1 - fi - # Parse MODEL:EFFORT format (portable - works in bash and zsh) - if [[ "$2" == *:* ]]; then - CODEX_MODEL="${2%%:*}" - CODEX_EFFORT="${2#*:}" - else - CODEX_MODEL="$2" - CODEX_EFFORT="$DEFAULT_CODEX_EFFORT" - fi - shift 2 - ;; - --codex-timeout) - if [[ -z "${2:-}" ]]; then - echo "Error: --codex-timeout requires a number argument (seconds)" >&2 - exit 1 - fi - if ! [[ "$2" =~ ^[0-9]+$ ]]; then - echo "Error: --codex-timeout must be a positive integer (seconds), got: $2" >&2 - exit 1 - fi - CODEX_TIMEOUT="$2" - shift 2 - ;; - -*) - echo "Error: Unknown option: $1" >&2 - echo "Use --help for usage information" >&2 - exit 1 - ;; - *) - echo "Error: Unexpected argument: $1" >&2 - echo "Use --help for usage information" >&2 - exit 1 - ;; - esac -done - -# ======================================== -# Validate Bot Flags -# ======================================== - -if [[ "$BOT_CLAUDE" != "true" && "$BOT_CODEX" != "true" ]]; then - echo "Error: At least one bot flag is required" >&2 - echo "" >&2 - echo "Usage: /humanize:start-pr-loop --claude|--codex [OPTIONS]" >&2 - echo "" >&2 - echo "Bot flags:" >&2 - echo " --claude Monitor reviews from claude[bot] (trigger: @claude)" >&2 - echo " --codex Monitor reviews from chatgpt-codex-connector[bot] (trigger: @codex)" >&2 - echo "" >&2 - echo "For help: /humanize:start-pr-loop --help" >&2 - exit 1 -fi - -# Build active_bots list (stored as array for YAML list format) -# Bot names stored in state: claude, codex -# Trigger mentions: @claude, @codex -# Comment authors: claude[bot], chatgpt-codex-connector[bot] -declare -a ACTIVE_BOTS_ARRAY=() -if [[ "$BOT_CLAUDE" == "true" ]]; then - ACTIVE_BOTS_ARRAY+=("claude") -fi -if [[ "$BOT_CODEX" == "true" ]]; then - ACTIVE_BOTS_ARRAY+=("codex") -fi - -# ======================================== -# Validate Prerequisites -# ======================================== - -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" - -# loop-common.sh already sourced above (provides find_active_loop, find_active_pr_loop, etc.) - -# Build dynamic mention string from active bots (using shared helper) -BOT_MENTION_STRING=$(build_bot_mention_string "${ACTIVE_BOTS_ARRAY[@]}") - -# ======================================== -# Mutual Exclusion Check -# ======================================== - -# Check for existing active loops (both RLCR and PR loops) -# Only one loop type can be active at a time -RLCR_LOOP_DIR=$(find_active_loop "$PROJECT_ROOT/.humanize/rlcr" 2>/dev/null || echo "") -PR_LOOP_DIR=$(find_active_pr_loop "$PROJECT_ROOT/.humanize/pr-loop" 2>/dev/null || echo "") - -if [[ -n "$RLCR_LOOP_DIR" ]]; then - echo "Error: An RLCR loop is already active" >&2 - echo " Active loop: $RLCR_LOOP_DIR" >&2 - echo "" >&2 - echo "Only one loop can be active at a time." >&2 - echo "Cancel the RLCR loop first with: /humanize:cancel-rlcr-loop" >&2 - exit 1 -fi - -if [[ -n "$PR_LOOP_DIR" ]]; then - echo "Error: A PR loop is already active" >&2 - echo " Active loop: $PR_LOOP_DIR" >&2 - echo "" >&2 - echo "Only one loop can be active at a time." >&2 - echo "Cancel the PR loop first with: /humanize:cancel-pr-loop" >&2 - exit 1 -fi - -# Check git repo (with timeout) -if ! run_with_timeout "$GIT_TIMEOUT" git rev-parse --git-dir &>/dev/null; then - echo "Error: Project must be a git repository (or git command timed out)" >&2 - exit 1 -fi - -# Check at least one commit (with timeout) -if ! run_with_timeout "$GIT_TIMEOUT" git rev-parse HEAD &>/dev/null 2>&1; then - echo "Error: Git repository must have at least one commit (or git command timed out)" >&2 - exit 1 -fi - -# Check gh CLI is installed -if ! command -v gh &>/dev/null; then - echo "Error: start-pr-loop requires the GitHub CLI (gh) to be installed" >&2 - echo "" >&2 - echo "Please install the GitHub CLI: https://cli.github.com/" >&2 - exit 1 -fi - -# Check gh CLI is authenticated -if ! gh auth status &>/dev/null 2>&1; then - echo "Error: GitHub CLI is not authenticated" >&2 - echo "" >&2 - echo "Please run: gh auth login" >&2 - exit 1 -fi - -# Check codex is available -if ! command -v codex &>/dev/null; then - echo "Error: start-pr-loop requires codex to run" >&2 - echo "" >&2 - echo "Please install Codex CLI: https://openai.com/codex" >&2 - exit 1 -fi - -# ======================================== -# Detect PR -# ======================================== - -START_BRANCH=$(run_with_timeout "$GIT_TIMEOUT" git -C "$PROJECT_ROOT" rev-parse --abbrev-ref HEAD) -if [[ -z "$START_BRANCH" ]]; then - echo "Error: Failed to get current branch (git command timed out or failed)" >&2 - exit 1 -fi - -# ======================================== -# Resolve Repository Context (for fork PR support) -# ======================================== -# IMPORTANT: For fork PRs, the PR lives in the upstream (parent) repo, not the fork. -# We must resolve the correct repo BEFORE attempting to get PR number/state. - -# Step 1: Get current repo -CURRENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json owner,name \ - -q '.owner.login + "/" + .name' 2>/dev/null) || CURRENT_REPO="" - -# Step 2: Check if current repo is a fork and get parent repo -PARENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json parent \ - -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - -# Step 3: Determine which repo to use for PR lookups -# Try current repo first, then parent (for fork case) -PR_LOOKUP_REPO="" -PR_NUMBER="" - -# Try to find PR using gh's auto-detection (no --repo flag) -# This handles cases where local branch name differs from PR head (e.g., renamed branch) -# IMPORTANT: gh pr view can auto-resolve to upstream repo when in a fork, so we must -# extract the actual repo from the PR URL rather than assuming it's CURRENT_REPO -PR_INFO=$(run_with_timeout "$GH_TIMEOUT" gh pr view --json number,url -q '.number,.url' 2>/dev/null) || PR_INFO="" -if [[ -n "$PR_INFO" ]]; then - # Parse number and URL from newline-separated output (jq outputs each field on separate line) - PR_NUMBER=$(echo "$PR_INFO" | head -1) - PR_URL=$(echo "$PR_INFO" | tail -1) - # Validate PR_NUMBER is numeric - if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Error: Invalid PR number from gh CLI: $PR_INFO" >&2 - PR_NUMBER="" - PR_URL="" - else - # Extract repo from URL: https://HOST/OWNER/REPO/pull/NUMBER -> OWNER/REPO - # Works with github.com and GitHub Enterprise (any host) - if [[ "$PR_URL" =~ https?://[^/]+/([^/]+/[^/]+)/pull/ ]]; then - PR_LOOKUP_REPO="${BASH_REMATCH[1]}" - else - # Fallback to current repo if URL parsing fails - PR_LOOKUP_REPO="$CURRENT_REPO" - fi - fi -fi - -# If not found in current repo and we have a parent (fork case), try parent -# IMPORTANT: For fork PRs, the head branch lives in the fork, so we must use -# the fork-qualified format (FORK_OWNER:BRANCH) when looking up in parent repo -if [[ -z "$PR_NUMBER" && -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - echo "Checking parent repo for PR (fork detected)..." >&2 - # Extract fork owner from CURRENT_REPO (format: owner/repo) - FORK_OWNER="${CURRENT_REPO%%/*}" - # Use fork-qualified branch name: FORK_OWNER:BRANCH - QUALIFIED_BRANCH="${FORK_OWNER}:${START_BRANCH}" - echo " Using qualified branch: $QUALIFIED_BRANCH" >&2 - PR_NUMBER=$(run_with_timeout "$GH_TIMEOUT" gh pr view --repo "$PARENT_REPO" "$QUALIFIED_BRANCH" --json number -q .number 2>/dev/null) || PR_NUMBER="" - if [[ -n "$PR_NUMBER" ]]; then - PR_LOOKUP_REPO="$PARENT_REPO" - echo "Found PR #$PR_NUMBER in parent repo: $PARENT_REPO" >&2 - fi -fi - -if [[ -z "$PR_NUMBER" ]]; then - echo "Error: No pull request found for branch '$START_BRANCH'" >&2 - echo "" >&2 - echo "Please create a pull request first:" >&2 - echo " gh pr create" >&2 - exit 1 -fi - -# Validate PR_NUMBER is numeric -if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Error: Invalid PR number from gh CLI: $PR_NUMBER" >&2 - exit 1 -fi - -# Get PR state (using resolved repo for fork support) -PR_STATE=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_LOOKUP_REPO" --json state -q .state 2>/dev/null) || PR_STATE="" -if [[ "$PR_STATE" == "MERGED" ]]; then - echo "Error: PR #$PR_NUMBER has already been merged" >&2 - exit 1 -fi -if [[ "$PR_STATE" == "CLOSED" ]]; then - echo "Error: PR #$PR_NUMBER has been closed" >&2 - exit 1 -fi - -# IMPORTANT: Use the PR's lookup repository for API calls -# Since PR_LOOKUP_REPO was already validated to contain this PR, we can use it directly -PR_BASE_REPO="$PR_LOOKUP_REPO" - -# ======================================== -# Validate YAML Safety -# ======================================== - -# Validate branch name for YAML safety (prevents injection in state.md) -if [[ "$START_BRANCH" == *[:\#\"\'\`]* ]] || [[ "$START_BRANCH" =~ $'\n' ]]; then - echo "Error: Branch name contains YAML-unsafe characters" >&2 - echo " Branch: $START_BRANCH" >&2 - echo " Characters not allowed: : # \" ' \` newline" >&2 - echo " Please checkout a branch with a simpler name" >&2 - exit 1 -fi - -# Validate codex model for YAML safety -if [[ ! "$CODEX_MODEL" =~ ^[a-zA-Z0-9._-]+$ ]]; then - echo "Error: Codex model contains invalid characters" >&2 - echo " Model: $CODEX_MODEL" >&2 - echo " Only alphanumeric, hyphen, underscore, dot allowed" >&2 - exit 1 -fi - -# Validate codex effort for YAML safety -if [[ ! "$CODEX_EFFORT" =~ ^[a-zA-Z0-9_-]+$ ]]; then - echo "Error: Codex effort contains invalid characters" >&2 - echo " Effort: $CODEX_EFFORT" >&2 - echo " Only alphanumeric, hyphen, underscore allowed" >&2 - exit 1 -fi - -# ======================================== -# Setup State Directory -# ======================================== - -LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/pr-loop" - -# Create timestamp for this loop session -TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S) -LOOP_DIR="$LOOP_BASE_DIR/$TIMESTAMP" - -mkdir -p "$LOOP_DIR" - -# ======================================== -# Fetch Initial Comments -# ======================================== - -COMMENT_FILE="$LOOP_DIR/round-0-pr-comment.md" - -# Build comma-separated bot list for fetch script -BOTS_COMMA_LIST=$(IFS=','; echo "${ACTIVE_BOTS_ARRAY[*]}") - -# Call fetch-pr-comments.sh to get all comments, grouped by active bots -"$SCRIPT_DIR/fetch-pr-comments.sh" "$PR_NUMBER" "$COMMENT_FILE" --bots "$BOTS_COMMA_LIST" - -# ======================================== -# Determine Startup Case -# ======================================== - -# Call check-pr-reviewer-status.sh to analyze PR state -REVIEWER_STATUS=$("$SCRIPT_DIR/check-pr-reviewer-status.sh" "$PR_NUMBER" --bots "$BOTS_COMMA_LIST" 2>/dev/null) || { - echo "Warning: Failed to check reviewer status, defaulting to Case 1" >&2 - REVIEWER_STATUS='{"case":1,"reviewers_commented":[],"reviewers_missing":[],"latest_commit_sha":"","latest_commit_at":"","newest_review_at":null,"has_commits_after_reviews":false}' -} - -# Parse reviewer status JSON -STARTUP_CASE=$(echo "$REVIEWER_STATUS" | jq -r '.case') -LATEST_COMMIT_SHA=$(echo "$REVIEWER_STATUS" | jq -r '.latest_commit_sha') -LATEST_COMMIT_AT=$(echo "$REVIEWER_STATUS" | jq -r '.latest_commit_at') -HAS_COMMITS_AFTER=$(echo "$REVIEWER_STATUS" | jq -r '.has_commits_after_reviews') - -# Fallback to git HEAD if API didn't return commit SHA -if [[ -z "$LATEST_COMMIT_SHA" ]] || [[ "$LATEST_COMMIT_SHA" == "null" ]]; then - LATEST_COMMIT_SHA=$(run_with_timeout "$GIT_TIMEOUT" git rev-parse HEAD) -fi - -echo "Startup Case: $STARTUP_CASE" >&2 -echo "Latest Commit: $LATEST_COMMIT_SHA" >&2 - -# Handle Case 4/5: All reviewers commented but new commits exist -# Need to trigger re-review by posting @bot comment -LAST_TRIGGER_AT="" -TRIGGER_COMMENT_ID="" - -if [[ "$STARTUP_CASE" -eq 4 ]] || [[ "$STARTUP_CASE" -eq 5 ]]; then - # First, check if there's already a pending @mention after the latest commit - # This avoids duplicate @mention spam when user has already requested re-review - echo "Case $STARTUP_CASE: Checking for existing trigger comment after latest commit..." >&2 - - # Build regex patterns for bot mentions with word boundary anchoring - # Pattern: (start|non-username-char) + @botname + (end|non-username-char) - # Prevents false matches like @claude-dev or support@codex.io - MENTION_PATTERNS_JSON=$(printf '%s\n' "${ACTIVE_BOTS_ARRAY[@]}" | jq -R '"(^|[^a-zA-Z0-9_-])@" + . + "($|[^a-zA-Z0-9_-])"' | jq -s '.') - - # Find existing trigger comment that mentions ALL active bots after latest commit - # Notes: - # - Uses PR_BASE_REPO for fork PR support - # - Uses jq -s to aggregate paginated results before filtering - # - Reuse only when ALL bots are mentioned (partial mentions need new trigger) - # - Strips code blocks/inline code/quotes since GitHub ignores mentions there - if [[ -n "$LATEST_COMMIT_AT" && "$LATEST_COMMIT_AT" != "null" ]]; then - EXISTING_TRIGGER=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/$PR_NUMBER/comments" \ - --paginate 2>/dev/null \ - | jq -s --arg since "$LATEST_COMMIT_AT" --argjson patterns "$MENTION_PATTERNS_JSON" ' - # Strip content between delimiters, keeping even-indexed parts (outside delimiters) - # Used for fenced code blocks where regex fails on nested backticks - def strip_between(delim): [splits(delim)] | to_entries | map(select(.key % 2 == 0) | .value) | join(" "); - - # Strip code blocks, inline code, and quoted lines (GitHub ignores mentions in these) - def strip_non_mention_contexts: - strip_between("```") # fenced code blocks - | strip_between("~~~") # tilde fenced code blocks - | gsub("`[^`]*`"; " ") # inline code - | gsub("(^|\\n)( |\\t)[^\\n]*"; " ") # indented code blocks (4+ spaces or tab) - | gsub("(^|\\n)\\s*>[^\\n]*"; " "); # quoted lines (> prefix) - - [.[][] | select(.created_at > $since and ( - # Check that ALL patterns are present in the stripped body - # Use case-insensitive matching since GitHub mentions are case-insensitive - (.body | strip_non_mention_contexts) as $clean_body - | $patterns | all(. as $p | $clean_body | test($p; "i")) - ))] - | sort_by(.created_at) - | last - | {id: .id, created_at: .created_at} - ') || EXISTING_TRIGGER="" - else - EXISTING_TRIGGER="" - fi - - # Extract fields once to avoid repeated jq calls - # Skip jq parsing if EXISTING_TRIGGER is empty (API failure fallback) - if [[ -n "$EXISTING_TRIGGER" ]]; then - TRIGGER_COMMENT_ID=$(echo "$EXISTING_TRIGGER" | jq -r '.id // empty' 2>/dev/null) || TRIGGER_COMMENT_ID="" - LAST_TRIGGER_AT=$(echo "$EXISTING_TRIGGER" | jq -r '.created_at // empty' 2>/dev/null) || LAST_TRIGGER_AT="" - else - TRIGGER_COMMENT_ID="" - LAST_TRIGGER_AT="" - fi - - if [[ -n "$TRIGGER_COMMENT_ID" ]]; then - # Found existing @mention - reuse it instead of posting new one - echo "Found existing trigger comment (ID: $TRIGGER_COMMENT_ID), skipping duplicate @mention" >&2 - else - # No existing @mention - post new trigger - echo "No existing trigger found, posting trigger comment for re-review..." >&2 - - # Post trigger comment (abort on failure to prevent orphaned state) - # NOTE: Uses --repo for fork PR support (comments go to base repo, not fork) - TRIGGER_BODY="$BOT_MENTION_STRING please review the latest changes (new commits since last review)" - TRIGGER_RESULT=$(run_with_timeout "$GH_TIMEOUT" gh pr comment "$PR_NUMBER" --repo "$PR_BASE_REPO" --body "$TRIGGER_BODY" 2>&1) || { - echo "Error: Failed to post trigger comment: $TRIGGER_RESULT" >&2 - echo "" >&2 - echo "Cannot proceed without a trigger comment - bots would not be notified." >&2 - echo "Please check:" >&2 - echo " - GitHub API rate limits" >&2 - echo " - Network connectivity" >&2 - echo " - Repository permissions" >&2 - rm -rf "$LOOP_DIR" - exit 1 - } - - # Get the comment ID and use GitHub's timestamp to avoid clock skew - # Fetch the latest comment from current user - CURRENT_USER=$(run_with_timeout "$GH_TIMEOUT" gh api user --jq '.login' 2>/dev/null) || CURRENT_USER="" - if [[ -n "$CURRENT_USER" ]]; then - # Fetch both ID and created_at from the comment we just posted - # IMPORTANT: --jq with --paginate runs per-page, so aggregate first then filter - # IMPORTANT: Use PR_BASE_REPO for fork PR support - COMMENT_DATA=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/$PR_NUMBER/comments" \ - --paginate --jq ".[] | select(.user.login == \"$CURRENT_USER\") | {id: .id, created_at: .created_at}" 2>/dev/null \ - | jq -s 'sort_by(.created_at) | reverse | .[0]') || COMMENT_DATA="" - - if [[ -n "$COMMENT_DATA" && "$COMMENT_DATA" != "null" ]]; then - TRIGGER_COMMENT_ID=$(echo "$COMMENT_DATA" | jq -r '.id // empty') - # Use GitHub's timestamp instead of local time to avoid clock skew - LAST_TRIGGER_AT=$(echo "$COMMENT_DATA" | jq -r '.created_at // empty') - fi - fi - - # NOTE: Do NOT fall back to local time if GitHub timestamp fetch failed. - # Local clock skew could set a future timestamp, causing stop hook to filter - # out all comments. The stop hook has its own trigger detection logic that - # will find the trigger comment if LAST_TRIGGER_AT is empty. - fi - - # If --claude is specified, verify eyes reaction (MANDATORY per plan) - if [[ "$BOT_CLAUDE" == "true" ]]; then - echo "Verifying Claude eyes reaction (3 attempts x 5 seconds)..." >&2 - - if [[ -z "$TRIGGER_COMMENT_ID" ]]; then - # Fail if trigger comment ID not found (can't verify eyes without it) - echo "Error: Could not find trigger comment ID for eyes verification" >&2 - echo "" >&2 - echo "The trigger comment was posted but its ID could not be retrieved." >&2 - echo "This prevents verification of Claude's eyes reaction." >&2 - echo "" >&2 - echo "Please try:" >&2 - echo " 1. Wait a moment and try again" >&2 - echo " 2. Check GitHub rate limits" >&2 - echo " 3. Verify the comment was posted successfully" >&2 - - # Clean up the loop directory since we're failing - rm -rf "$LOOP_DIR" - exit 1 - fi - - # Check for eyes reaction with retry - # Pass --pr for fork PR support (reactions are on base repo) - if ! "$SCRIPT_DIR/check-bot-reactions.sh" claude-eyes "$TRIGGER_COMMENT_ID" --pr "$PR_NUMBER" --retry 3 --delay 5 >/dev/null 2>&1; then - echo "Error: Claude bot did not respond with eyes reaction" >&2 - echo "" >&2 - echo "This may indicate:" >&2 - echo " - Claude bot is not configured on this repository" >&2 - echo " - Network issues preventing Claude from seeing the mention" >&2 - echo "" >&2 - echo "Please verify Claude bot is set up correctly on this repository." >&2 - - # Clean up the loop directory since we're failing - rm -rf "$LOOP_DIR" - exit 1 - fi - echo "Claude eyes reaction confirmed!" >&2 - fi -fi - -# ======================================== -# Create State File -# ======================================== - -# Build YAML list for active_bots and configured_bots (using shared helper) -ACTIVE_BOTS_YAML=$(build_yaml_list "${ACTIVE_BOTS_ARRAY[@]}") - -# configured_bots is identical to active_bots at start, but never changes -# This allows re-polling previously approved bots if they post new issues -CONFIGURED_BOTS_YAML="$ACTIVE_BOTS_YAML" - -cat > "$LOOP_DIR/state.md" << EOF ---- -current_round: 0 -max_iterations: $MAX_ITERATIONS -pr_number: $PR_NUMBER -start_branch: $START_BRANCH -configured_bots:${CONFIGURED_BOTS_YAML} -active_bots:${ACTIVE_BOTS_YAML} -codex_model: $CODEX_MODEL -codex_effort: $CODEX_EFFORT -codex_timeout: $CODEX_TIMEOUT -poll_interval: $POLL_INTERVAL -poll_timeout: $POLL_TIMEOUT -started_at: $(date -u +%Y-%m-%dT%H:%M:%SZ) -startup_case: $STARTUP_CASE -latest_commit_sha: $LATEST_COMMIT_SHA -latest_commit_at: ${LATEST_COMMIT_AT:-} -last_trigger_at: ${LAST_TRIGGER_AT:-} -trigger_comment_id: ${TRIGGER_COMMENT_ID:-} ---- -EOF - -# ======================================== -# Create Goal Tracker -# ======================================== - -GOAL_TRACKER_FILE="$LOOP_DIR/goal-tracker.md" - -# Build display string for active bots -ACTIVE_BOTS_DISPLAY=$(IFS=', '; echo "${ACTIVE_BOTS_ARRAY[*]}") - -# Build acceptance criteria rows for each bot -BOT_AC_ROWS="" -AC_NUM=1 -for bot in "${ACTIVE_BOTS_ARRAY[@]}"; do - BOT_AC_ROWS="${BOT_AC_ROWS}| AC-${AC_NUM} | Get approval from ${bot} | ${bot} | pending | -" - AC_NUM=$((AC_NUM + 1)) -done - -# Current timestamp for log -STARTED_AT=$(date -u +%Y-%m-%dT%H:%M:%SZ) - -# Goal tracker template variables -GOAL_TRACKER_VARS=( - "PR_NUMBER=$PR_NUMBER" - "START_BRANCH=$START_BRANCH" - "ACTIVE_BOTS_DISPLAY=$ACTIVE_BOTS_DISPLAY" - "STARTUP_CASE=$STARTUP_CASE" - "BOT_AC_ROWS=$BOT_AC_ROWS" - "STARTED_AT=$STARTED_AT" -) - -FALLBACK_GOAL_TRACKER="# PR Loop Goal Tracker - -## PR Information - -- **PR Number:** #$PR_NUMBER -- **Branch:** $START_BRANCH -- **Monitored Bots:** $ACTIVE_BOTS_DISPLAY -- **Startup Case:** $STARTUP_CASE - -## Ultimate Goal - -Get all monitored bot reviewers ($ACTIVE_BOTS_DISPLAY) to approve this PR. - -## Acceptance Criteria - -| AC | Description | Bot | Status | -|----|-------------|-----|--------| -${BOT_AC_ROWS} -## Current Status - -### Round 0: Initialization - -- **Phase:** Waiting for initial bot reviews -- **Active Bots:** $ACTIVE_BOTS_DISPLAY -- **Approved Bots:** (none yet) - -### Open Issues - -| Round | Bot | Issue | Status | -|-------|-----|-------|--------| -| - | - | (awaiting initial reviews) | pending | - -### Addressed Issues - -| Round | Bot | Issue | Resolution | -|-------|-----|-------|------------| - -## Log - -| Round | Timestamp | Event | -|-------|-----------|-------| -| 0 | $STARTED_AT | PR loop initialized (Case $STARTUP_CASE) | -" - -GOAL_TRACKER_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/goal-tracker-initial.md" "$FALLBACK_GOAL_TRACKER" "${GOAL_TRACKER_VARS[@]}") -echo "$GOAL_TRACKER_CONTENT" > "$GOAL_TRACKER_FILE" - -echo "Goal tracker created: $GOAL_TRACKER_FILE" >&2 - -# ======================================== -# Create Initial Prompt -# ======================================== - -RESOLVE_PATH="$LOOP_DIR/round-0-pr-resolve.md" - -# Detect if comments exist by checking for the "No comments found" sentinel -# fetch-pr-comments.sh outputs "*No comments found.*" only when there are zero comments -if grep -q '^\*No comments found\.\*$' "$COMMENT_FILE" 2>/dev/null; then - COMMENT_COUNT=0 -else - COMMENT_COUNT=1 # Non-zero indicates comments exist -fi - -# Template variables for rendering -TEMPLATE_VARS=( - "PR_NUMBER=$PR_NUMBER" - "START_BRANCH=$START_BRANCH" - "ACTIVE_BOTS_DISPLAY=$ACTIVE_BOTS_DISPLAY" - "RESOLVE_PATH=$RESOLVE_PATH" - "BOT_MENTION_STRING=$BOT_MENTION_STRING" -) - -# Fallback header (used if template fails to load) -FALLBACK_HEADER="Read and execute below with ultrathink - -## PR Review Loop (Round 0) - -You are in a PR review loop monitoring feedback from remote review bots. - -**PR Information:** -- PR Number: #{{PR_NUMBER}} -- Branch: {{START_BRANCH}} -- Active Bots: {{ACTIVE_BOTS_DISPLAY}} - -## Review Comments - -The following comments have been fetched from the PR: -" - -# Load and render header template -HEADER_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/round-0-header.md" "$FALLBACK_HEADER" "${TEMPLATE_VARS[@]}") - -# Write header to prompt file -echo "$HEADER_CONTENT" > "$LOOP_DIR/round-0-prompt.md" - -# Append the fetched comments -cat "$COMMENT_FILE" >> "$LOOP_DIR/round-0-prompt.md" - -# Select task template based on whether there are comments -if [[ "$COMMENT_COUNT" -eq 0 ]]; then - # No comments yet - this is a fresh PR, bots will review automatically - FALLBACK_TASK=" ---- - -## Your Task - -This PR has no review comments yet. The monitored bots ({{ACTIVE_BOTS_DISPLAY}}) will automatically review the PR - you do NOT need to comment to trigger the first review. - -1. **Wait for automatic bot reviews**: - - Simply write your summary and try to exit - - The Stop Hook will poll for the first bot reviews - -2. **Write your initial summary** to: @{{RESOLVE_PATH}} - - Note that this is Round 0 awaiting initial bot reviews - - No issues to address yet - ---- - -## Important Rules - -1. **Do not comment to trigger review**: First reviews are automatic -2. **Do not modify state files**: The .humanize/pr-loop/ files are managed by the system -3. **Trust the process**: The Stop Hook manages polling and Codex validation - ---- - -Note: After you write your summary and try to exit, the Stop Hook will: -1. Poll for bot reviews (every 30 seconds, up to 15 minutes per bot) -2. When reviews arrive, local Codex will validate if they indicate approval -3. If issues are found, you will receive feedback and continue -4. If all bots approve, the loop ends -" - TASK_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/round-0-task-no-comments.md" "$FALLBACK_TASK" "${TEMPLATE_VARS[@]}") -else - # Has comments - normal flow with issues to address - FALLBACK_TASK=" ---- - -## Your Task - -1. **Analyze the comments above**, prioritizing: - - Human comments first (they take precedence) - - Bot comments (newest first) - -2. **Fix any issues** identified by the reviewers: - - Read the relevant code files - - Make necessary changes - - Create appropriate tests if needed - -3. **After fixing issues**: - - Commit your changes with a descriptive message - - Push to the remote repository - - Comment on the PR to trigger re-review: - \`\`\`bash - gh pr comment {{PR_NUMBER}} --body \"{{BOT_MENTION_STRING}} please review the latest changes\" - \`\`\` - -4. **Write your resolution summary** to: @{{RESOLVE_PATH}} - - List what issues were addressed - - Files modified - - Tests added (if any) - ---- - -## Important Rules - -1. **Do not modify state files**: The .humanize/pr-loop/ files are managed by the system -2. **Always push changes**: Your fixes must be pushed for bots to review them -3. **Use the correct comment format**: Tag the bots to trigger their reviews -4. **Be thorough**: Address all valid concerns from the reviewers - ---- - -Note: After you write your summary and try to exit, the Stop Hook will: -1. Poll for new bot reviews (every 30 seconds, up to 15 minutes per bot) -2. When reviews arrive, local Codex will validate if they indicate approval -3. If issues remain, you will receive feedback and continue -4. If all bots approve, the loop ends -" - TASK_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/round-0-task-has-comments.md" "$FALLBACK_TASK" "${TEMPLATE_VARS[@]}") -fi - -# Append task section to prompt file -echo "$TASK_CONTENT" >> "$LOOP_DIR/round-0-prompt.md" - -# ======================================== -# Output Setup Message -# ======================================== - -# All important work is done. If output fails due to SIGPIPE (pipe closed), exit cleanly. -trap 'exit 0' PIPE - -cat << EOF -=== start-pr-loop activated === - -PR Number: #$PR_NUMBER -Branch: $START_BRANCH -Active Bots: $ACTIVE_BOTS_DISPLAY -Comments Fetched: $COMMENT_COUNT -Max Iterations: $MAX_ITERATIONS -Codex Model: $CODEX_MODEL -Codex Effort: $CODEX_EFFORT -Codex Timeout: ${CODEX_TIMEOUT}s -Poll Interval: ${POLL_INTERVAL}s -Poll Timeout: ${POLL_TIMEOUT}s (per bot) -Loop Directory: $LOOP_DIR - -The PR loop is now active. When you try to exit: -1. Stop Hook polls for new bot reviews (every 30s) -2. When reviews arrive, local Codex validates them -3. If issues remain, you'll receive feedback and continue -4. If all bots approve, the loop ends - -To cancel: /humanize:cancel-pr-loop - ---- - -EOF - -# Output the initial prompt -cat "$LOOP_DIR/round-0-prompt.md" - -# Output critical requirements based on whether there are comments -echo "" -if [[ "$COMMENT_COUNT" -eq 0 ]]; then - FALLBACK_CRITICAL=" -=========================================== -CRITICAL - Work Completion Requirements -=========================================== - -When you complete your work, you MUST: - -1. Write your resolution summary to: - {{RESOLVE_PATH}} - - The summary should note: - - This is Round 0 awaiting initial bot reviews - - No issues to address yet - -2. Try to exit - the Stop Hook will poll for bot reviews - -DO NOT comment on the PR to trigger review - the bots will -review automatically since this is a new PR. - -The Stop Hook will poll for bot reviews. -===========================================" - CRITICAL_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/critical-requirements-no-comments.md" "$FALLBACK_CRITICAL" "${TEMPLATE_VARS[@]}") -else - FALLBACK_CRITICAL=" -=========================================== -CRITICAL - Work Completion Requirements -=========================================== - -When you complete your work, you MUST: - -1. COMMIT and PUSH your changes: - - Create a commit with descriptive message - - Push to the remote repository - -2. Comment on the PR to trigger re-review: - gh pr comment {{PR_NUMBER}} --body \"{{BOT_MENTION_STRING}} please review\" - -3. Write your resolution summary to: - {{RESOLVE_PATH}} - - The summary should include: - - Issues addressed - - Files modified - - Tests added (if any) - -The Stop Hook will then poll for bot reviews. -===========================================" - CRITICAL_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/critical-requirements-has-comments.md" "$FALLBACK_CRITICAL" "${TEMPLATE_VARS[@]}") -fi -echo "$CRITICAL_CONTENT" - -# Explicit exit 0 to ensure clean exit code even if final output fails -exit 0 diff --git a/scripts/setup-rlcr-loop.sh b/scripts/setup-rlcr-loop.sh index 209bb4ab..15326bc4 100755 --- a/scripts/setup-rlcr-loop.sh +++ b/scripts/setup-rlcr-loop.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Setup script for start-rlcr-loop # @@ -48,9 +48,45 @@ BASE_BRANCH="" FULL_REVIEW_ROUND="$DEFAULT_FULL_REVIEW_ROUND" SKIP_IMPL="false" SKIP_IMPL_NO_PLAN="false" +SKIP_IMPL_PLAN_ANCHORED="false" ASK_CODEX_QUESTION="true" AGENT_TEAMS="${DEFAULT_AGENT_TEAMS:-false}" BITLESSON_ALLOW_EMPTY_NONE="true" +PRIVACY_MODE="false" + +extract_plan_goal_content() { + local plan_path="$1" + local goal_section="" + + goal_section=$({ sed -n '/^##[[:space:]]*[Gg]oal\|^##[[:space:]]*[Oo]bjective\|^##[[:space:]]*[Pp]urpose/,/^##/p' "$plan_path" 2>/dev/null || true; } | head -20 | tail -n +2 | head -10) + if [[ -n "$goal_section" ]]; then + printf '%s\n' "$goal_section" + return + fi + + awk ' + /^[[:space:]]*#/ { next } + /^[[:space:]]*$/ { + if (started) { + exit + } + next + } + { + print + started=1 + lines++ + if (lines >= 5) { + exit + } + } + ' "$plan_path" +} + +extract_plan_ac_content() { + local plan_path="$1" + { sed -n '/^##[[:space:]]*[Aa]cceptance\|^##[[:space:]]*[Cc]riteria\|^##[[:space:]]*[Rr]equirements/,/^##/p' "$plan_path" 2>/dev/null || true; } | head -30 | tail -n +2 | head -25 +} show_help() { cat <<HELP_EOF @@ -100,6 +136,7 @@ OPTIONS: Allow BitLesson delta with action:none even with no new entries (default) --require-bitlesson-entry-for-none Require at least one BitLesson entry when action is none + --privacy Disable methodology analysis at loop exit (default: analysis enabled) -h, --help Show this help message DESCRIPTION: @@ -260,6 +297,10 @@ while [[ $# -gt 0 ]]; do BITLESSON_ALLOW_EMPTY_NONE="false" shift ;; + --privacy) + PRIVACY_MODE="true" + shift + ;; -*) echo "Unknown option: $1" >&2 echo "Use --help for usage information" >&2 @@ -282,9 +323,13 @@ done # Validate Prerequisites # ======================================== -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" +PROJECT_ROOT="$(resolve_project_root)" || { + echo "Error: Cannot determine humanize project root." >&2 + echo " Set CLAUDE_PROJECT_DIR or run inside a git repository." >&2 + exit 1 +} -# loop-common.sh already sourced above (provides find_active_loop, find_active_pr_loop, etc.) +# loop-common.sh already sourced above (provides find_active_loop, etc.) # ======================================== # Required Dependency Check @@ -321,10 +366,8 @@ fi # Mutual Exclusion Check # ======================================== -# Check for existing active loops (both RLCR and PR loops) -# Only one loop type can be active at a time +# Check for existing active RLCR loop RLCR_LOOP_DIR=$(find_active_loop "$PROJECT_ROOT/.humanize/rlcr" 2>/dev/null || echo "") -PR_LOOP_DIR=$(find_active_pr_loop "$PROJECT_ROOT/.humanize/pr-loop" 2>/dev/null || echo "") if [[ -n "$RLCR_LOOP_DIR" ]]; then echo "Error: An RLCR loop is already active" >&2 @@ -335,15 +378,6 @@ if [[ -n "$RLCR_LOOP_DIR" ]]; then exit 1 fi -if [[ -n "$PR_LOOP_DIR" ]]; then - echo "Error: A PR loop is already active" >&2 - echo " Active loop: $PR_LOOP_DIR" >&2 - echo "" >&2 - echo "Only one loop can be active at a time." >&2 - echo "Cancel the PR loop first with: /humanize:cancel-pr-loop" >&2 - exit 1 -fi - # ======================================== # Agent Teams Validation # ======================================== @@ -641,6 +675,10 @@ else LINE_COUNT=0 fi # End of skip-impl plan file content validation skip +if [[ "$SKIP_IMPL" == "true" ]] && [[ "$SKIP_IMPL_NO_PLAN" != "true" ]]; then + SKIP_IMPL_PLAN_ANCHORED="true" +fi + # ======================================== # Record Branch # ======================================== @@ -857,9 +895,13 @@ review_started: $INITIAL_REVIEW_STARTED ask_codex_question: $ASK_CODEX_QUESTION session_id: agent_teams: $AGENT_TEAMS +privacy_mode: $PRIVACY_MODE bitlesson_required: $BITLESSON_STATE_VALUE bitlesson_file: $BITLESSON_FILE_REL bitlesson_allow_empty_none: $BITLESSON_ALLOW_EMPTY_NONE +mainline_stall_count: 0 +last_mainline_verdict: unknown +drift_status: normal started_at: $(date -u +%Y-%m-%dT%H:%M:%SZ) --- EOF @@ -887,27 +929,132 @@ fi GOAL_TRACKER_FILE="$LOOP_DIR/goal-tracker.md" if [[ "$SKIP_IMPL" == "true" ]]; then - # Create simplified goal tracker for skip-impl mode (no placeholder text) - cat > "$GOAL_TRACKER_FILE" << 'GOAL_TRACKER_EOF' + if [[ "$SKIP_IMPL_PLAN_ANCHORED" == "true" ]]; then + PLAN_GOAL_CONTENT=$(extract_plan_goal_content "$FULL_PLAN_PATH") + PLAN_AC_CONTENT=$(extract_plan_ac_content "$FULL_PLAN_PATH") + + if [[ -z "$PLAN_GOAL_CONTENT" ]]; then + PLAN_GOAL_CONTENT="Preserve the original plan scope from $PLAN_FILE while resolving code review findings on the current branch." + fi + + if [[ -z "$PLAN_AC_CONTENT" ]]; then + PLAN_AC_CONTENT=$(cat <<EOF +- The current branch remains aligned with the original plan at $PLAN_FILE. +- All blocking \`[P0-9]\` code review findings are resolved without widening scope beyond the original plan. +- Non-blocking follow-up items are explicitly queued and do not block completion. +EOF +) + fi + + cat > "$GOAL_TRACKER_FILE" << EOF +# Goal Tracker (Skip Implementation Mode with Plan Anchor) + +This RLCR loop was started with \`--skip-impl\` flag. The implementation phase was skipped, +but an explicit plan was provided and remains the scope anchor for review-only work. + +This tracker is still used to keep the review loop aligned around one mainline objective +and to separate blocking issues from queued follow-up work. + +## IMMUTABLE SECTION + +### Ultimate Goal + +$PLAN_GOAL_CONTENT + +### Acceptance Criteria + +$PLAN_AC_CONTENT + +--- + +## MUTABLE SECTION + +### Plan Version: Review-Only (Updated: Round 0) + +#### Plan Evolution Log +| Round | Change | Reason | Impact on AC | +|-------|--------|--------|--------------| +| 0 | Skip implementation mode initialized around explicit plan anchor | Loop started with \`--skip-impl\` and retained @$PLAN_FILE as scope anchor | Review stays aligned with original plan | + +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| [mainline] Preserve original plan alignment while resolving blocking review findings | Plan ACs in scope | pending | Review-only mode with explicit plan anchor | + +### Blocking Side Issues +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| + +### Queued Side Issues +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| + +### Completed and Verified +| AC | Task | Completed Round | Verified Round | Evidence | +|----|------|-----------------|----------------|----------| + +### Explicitly Deferred +| Task | Original AC | Deferred Since | Justification | When to Reconsider | +|------|-------------|----------------|---------------|-------------------| + +EOF + else + # Create review-only goal tracker for skip-impl mode without a plan (no placeholder text) + cat > "$GOAL_TRACKER_FILE" << 'GOAL_TRACKER_EOF' # Goal Tracker (Skip Implementation Mode) This RLCR loop was started with `--skip-impl` flag. The implementation phase was skipped, and the loop is running in code review mode only. -## Mode: Code Review Only +This tracker is still used to keep the review loop aligned around one mainline objective +and to separate blocking issues from queued follow-up work. + +## IMMUTABLE SECTION + +### Ultimate Goal + +Pass code review for the current branch without regressing existing behavior. -The goal tracker is not used in skip-impl mode because: -- There is no implementation plan to track -- The loop focuses solely on code review quality -- No acceptance criteria tracking is needed +### Acceptance Criteria -## What This Loop Does +- AC-1: All blocking `[P0-9]` code review findings are resolved. +- AC-2: Non-blocking follow-up items are explicitly queued and do not block completion. +- AC-3: Finalize phase can complete without introducing new review regressions. -1. Runs `codex review` on changes between base branch and current branch -2. If issues are found, Claude fixes them iteratively -3. When no issues remain, enters finalize phase for code simplification +--- + +## MUTABLE SECTION + +### Plan Version: Review-Only (Updated: Round 0) + +#### Plan Evolution Log +| Round | Change | Reason | Impact on AC | +|-------|--------|--------|--------------| +| 0 | Skip implementation mode initialized | Loop started with `--skip-impl` | Focus on review-only objective | + +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| [mainline] Pass code review for current branch | AC-1 | pending | Review-only mode | + +### Blocking Side Issues +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| + +### Queued Side Issues +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| + +### Completed and Verified +| AC | Task | Completed Round | Verified Round | Evidence | +|----|------|-----------------|----------------|----------| + +### Explicitly Deferred +| Task | Original AC | Deferred Since | Justification | When to Reconsider | +|------|-------------|----------------|---------------|-------------------| GOAL_TRACKER_EOF + fi else # Normal mode: create full goal tracker @@ -935,11 +1082,8 @@ GOAL_TRACKER_EOF # Extract goal from plan file (look for ## Goal, ## Objective, or first paragraph) # This is a heuristic - Claude will refine it in round 0 # Use ^## without leading whitespace - markdown headers should start at column 0 -GOAL_LINE=$(grep -i -m1 '^##[[:space:]]*\(goal\|objective\|purpose\)' "$FULL_PLAN_PATH" 2>/dev/null || echo "") -if [[ -n "$GOAL_LINE" ]]; then - # Get the content after the heading - # Use || true after sed to ignore SIGPIPE when head closes the pipe early (pipefail mode) - GOAL_SECTION=$({ sed -n '/^##[[:space:]]*[Gg]oal\|^##[[:space:]]*[Oo]bjective\|^##[[:space:]]*[Pp]urpose/,/^##/p' "$FULL_PLAN_PATH" || true; } | head -20 | tail -n +2 | head -10) +GOAL_SECTION=$(extract_plan_goal_content "$FULL_PLAN_PATH") +if [[ -n "$GOAL_SECTION" ]]; then echo "$GOAL_SECTION" >> "$GOAL_TRACKER_FILE" else # Use first non-empty, non-heading paragraph as goal description @@ -959,7 +1103,7 @@ GOAL_TRACKER_EOF # Extract acceptance criteria from plan file (look for ## Acceptance, ## Criteria, ## Requirements) # Use ^## without leading whitespace - markdown headers should start at column 0 # Use || true after sed to ignore SIGPIPE when head closes the pipe early (pipefail mode) -AC_SECTION=$({ sed -n '/^##[[:space:]]*[Aa]cceptance\|^##[[:space:]]*[Cc]riteria\|^##[[:space:]]*[Rr]equirements/,/^##/p' "$FULL_PLAN_PATH" 2>/dev/null || true; } | head -30 | tail -n +2 | head -25) +AC_SECTION=$(extract_plan_ac_content "$FULL_PLAN_PATH") if [[ -n "$AC_SECTION" ]]; then echo "$AC_SECTION" >> "$GOAL_TRACKER_FILE" else @@ -982,10 +1126,20 @@ cat >> "$GOAL_TRACKER_FILE" << 'GOAL_TRACKER_EOF' | 0 | Initial plan | - | - | #### Active Tasks -<!-- Map each task to its target Acceptance Criterion and routing tag --> +<!-- Mainline tasks only: each task must directly advance the current round objective and carry routing metadata --> | Task | Target AC | Status | Tag | Owner | Notes | |------|-----------|--------|-----|-------|-------| -| [To be populated by Claude based on plan] | - | pending | coding or analyze | claude or codex | - | +| [To be populated by Claude based on plan] | - | pending | coding or analyze | claude or codex | mainline task only | + +### Blocking Side Issues +<!-- Only issues that directly block current mainline progress belong here --> +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| + +### Queued Side Issues +<!-- Non-blocking issues stay queued and must NOT replace the round objective --> +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| ### Completed and Verified <!-- Only move tasks here after Codex verification --> @@ -997,10 +1151,6 @@ cat >> "$GOAL_TRACKER_FILE" << 'GOAL_TRACKER_EOF' | Task | Original AC | Deferred Since | Justification | When to Reconsider | |------|-------------|----------------|---------------|-------------------| -### Open Issues -<!-- Issues discovered during implementation --> -| Issue | Discovered Round | Blocking AC | Resolution Path | -|-------|-----------------|-------------|-----------------| GOAL_TRACKER_EOF fi # End of skip-impl goal tracker handling @@ -1043,12 +1193,35 @@ SUMMARY_TMPL_EOF # ======================================== SUMMARY_PATH="$LOOP_DIR/round-0-summary.md" +ROUND_CONTRACT_PATH="$LOOP_DIR/round-0-contract.md" # Create the round-0 summary scaffold before either mode starts so stop-hook # validation and BitLesson Delta checks have a valid target file. write_summary_template "$SUMMARY_PATH" if [[ "$SKIP_IMPL" == "true" ]]; then + if [[ "$SKIP_IMPL_PLAN_ANCHORED" == "true" ]]; then + cat > "$ROUND_CONTRACT_PATH" << EOF +# Round 0 Contract + +- Mainline Objective: Keep the current branch aligned with @$PLAN_FILE while resolving only review findings that block clean acceptance. +- Target ACs: The original plan acceptance criteria affected by the current branch changes. +- Blocking Side Issues In Scope: Any \`[P0-9]\` findings or regressions that block review acceptance or violate the original plan scope. +- Queued Side Issues Out of Scope: Non-blocking cleanup, follow-up refactors, or future improvements that do not block review acceptance or plan alignment. +- Success Criteria: Code review passes and the current branch still matches the original plan's intended scope. +EOF + else + cat > "$ROUND_CONTRACT_PATH" << 'ROUND_CONTRACT_EOF' +# Round 0 Contract + +- Mainline Objective: Run code review for the current branch and resolve only findings that block clean acceptance. +- Target ACs: AC-1, AC-2 +- Blocking Side Issues In Scope: Any `[P0-9]` findings from the active review cycle. +- Queued Side Issues Out of Scope: Non-blocking cleanup, follow-up refactors, or future improvements that do not block review acceptance. +- Success Criteria: Code review passes with no blocking findings, and any remaining non-blocking follow-up is explicitly queued. +ROUND_CONTRACT_EOF + fi + # Skip-impl mode: create a prompt for code review only cat > "$LOOP_DIR/round-0-prompt.md" << EOF # Skip Implementation Mode - Code Review Loop @@ -1065,6 +1238,11 @@ The loop will automatically run \`codex review\` on your changes when you try to If issues are found (marked with [P0-9] priority), you'll need to fix them before the loop ends. Do not try to execute anything to trigger the review - just stop and it will run automatically. +Before requesting review, read: +- @$PLAN_FILE +- @$GOAL_TRACKER_FILE +- @$ROUND_CONTRACT_PATH + ## Your Task 1. Review your current work @@ -1073,10 +1251,32 @@ Do not try to execute anything to trigger the review - just stop and it will run 4. Repeat until no issues remain 5. Enter finalize phase for code simplification -## Note +## Review Objective + +Use the round contract as the current anchor: +- Keep one stable mainline objective and do not let it drift +- Treat review findings as \`[blocking]\` only if they block review acceptance +- Record non-blocking follow-up as \`[queued]\` +- Do not let queued work take over the round + +EOF + if [[ "$SKIP_IMPL_PLAN_ANCHORED" == "true" ]]; then + cat >> "$LOOP_DIR/round-0-prompt.md" << EOF +- Keep review-only work aligned with the original plan at @$PLAN_FILE -Since this is skip-impl mode, there is no implementation plan to follow. -The goal tracker is not used - focus on fixing code review issues. +Implementation phase is skipped, but the original plan still defines the intended branch scope. + +EOF + else + cat >> "$LOOP_DIR/round-0-prompt.md" << 'EOF' +There is no explicit implementation plan for this loop, so the review-only contract is the primary anchor. + +EOF + fi + + cat >> "$LOOP_DIR/round-0-prompt.md" << EOF + +Keep @$ROUND_CONTRACT_PATH updated if the blocking/queued split changes materially during review iterations. When you're ready for review, write a brief summary of your changes and try to exit (do not try to execute anything, just stop). @@ -1097,8 +1297,21 @@ Before starting implementation, you MUST initialize the Goal Tracker: 1. Read @$GOAL_TRACKER_FILE 2. If the "Ultimate Goal" section says "[To be extracted...]", extract a clear goal statement from the plan 3. If the "Acceptance Criteria" section says "[To be defined...]", define 3-7 specific, testable criteria -4. Populate the "Active Tasks" table with tasks from the plan, mapping each to an AC and filling Tag/Owner -5. Write the updated goal-tracker.md +4. Populate the "Active Tasks" table with MAINLINE tasks from the plan, mapping each to an AC and filling Tag/Owner +5. Record any already-known side issues in either "Blocking Side Issues" or "Queued Side Issues" +6. Write the updated goal-tracker.md + +## Round Contract Setup (REQUIRED BEFORE CODING) + +Before starting implementation, create @$ROUND_CONTRACT_PATH with: + +1. **One mainline objective** for this round +2. **Target ACs** (1-2 ACs only) +3. **Blocking side issues in scope** for this round +4. **Queued side issues out of scope** for this round +5. **Round success criteria** + +Use this contract to keep the round focused. Do NOT let non-blocking bugs or cleanup work replace the mainline objective. **IMPORTANT**: The IMMUTABLE SECTION can only be modified in Round 0. After this round, it becomes read-only. @@ -1106,8 +1319,18 @@ Before starting implementation, you MUST initialize the Goal Tracker: ## Implementation Plan -For all tasks that need to be completed, please use the Task system (TaskCreate, TaskUpdate, TaskList) to track each item in order of importance. -You are strictly prohibited from only addressing the most important issues - you MUST create Tasks for ALL discovered issues and attempt to resolve each one. +For all tasks that need to be completed, please use the Task system (TaskCreate, TaskUpdate, TaskList). + +Every task MUST start with exactly one lane tag: +- \`[mainline]\` for plan-derived work that directly advances the round objective +- \`[blocking]\` for issues that prevent the mainline objective from succeeding safely +- \`[queued]\` for non-blocking bugs, cleanup, or follow-up work + +Rules: +- \`[mainline]\` tasks are the primary success condition for the round +- \`[blocking]\` tasks may be resolved in the round only if they truly block mainline progress +- \`[queued]\` tasks must NOT become the round objective and do NOT need to be cleared before moving on +- If a new issue is not blocking the current objective, tag it \`[queued]\` and keep moving on the mainline ## Task Tag Routing (MUST FOLLOW) @@ -1176,18 +1399,24 @@ cat >> "$LOOP_DIR/round-0-prompt.md" << EOF Throughout your work, you MUST maintain the Goal Tracker: -1. **Before starting a task**: Mark it as "in_progress" in Active Tasks +1. **Before starting a round**: Re-anchor on the original plan and current round contract +2. **Before starting a task**: Mark the relevant mainline task as "in_progress" in Active Tasks - Confirm Tag/Owner routing is correct before execution -2. **After completing a task**: Move it to "Completed and Verified" with evidence (but mark as "pending verification") -3. **If you discover the plan has errors**: +3. **Active Tasks** are MAINLINE tasks only - side issues do not belong there +4. **Blocking Side Issues** are reserved for issues that truly stop mainline progress +5. **Queued Side Issues** are non-blocking and must not take over the round +6. **After completing a mainline task**: Move it to "Completed and Verified" with evidence (but mark as "pending verification") +7. **If you discover the plan has errors**: - Do NOT silently change direction - Add entry to "Plan Evolution Log" with justification - Explain how the change still serves the Ultimate Goal -4. **If you need to defer a task**: +8. **If you need to defer a task**: - Move it to "Explicitly Deferred" section - Provide strong justification - Explain impact on Acceptance Criteria -5. **If you discover new issues**: Add to "Open Issues" table +9. **If you discover new issues**: + - Add to "Blocking Side Issues" only if mainline progress is blocked + - Otherwise add to "Queued Side Issues" or keep them as \`[queued]\` tasks/backlog --- @@ -1196,8 +1425,9 @@ Note: You MUST NOT try to exit \`start-rlcr-loop\` loop by lying or edit loop st After completing the work, please: 0. If you have access to the \`code-simplifier\` agent, use it to review and optimize the code you just wrote 1. Finalize @$GOAL_TRACKER_FILE (this is Round 0, so you are initializing it - see "Goal Tracker Setup" above) -2. Commit your changes with a descriptive commit message -3. Write your work summary into @$SUMMARY_PATH +2. Write your round contract into @$ROUND_CONTRACT_PATH +3. Commit your changes with a descriptive commit message +4. Write your work summary into @$SUMMARY_PATH EOF # Add push instruction only if push_every_round is true diff --git a/scripts/statusline.sh b/scripts/statusline.sh index b6430052..bc0f5f41 100755 --- a/scripts/statusline.sh +++ b/scripts/statusline.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # DISCLAIMER # ---------- @@ -37,7 +37,9 @@ format_duration() { _resolve_rlcr_display() { local session_dir="$1" - if [[ -f "$session_dir/finalize-state.md" ]]; then + if [[ -f "$session_dir/methodology-analysis-state.md" ]]; then + echo "Analyzing" + elif [[ -f "$session_dir/finalize-state.md" ]]; then echo "Finalizing" elif [[ -f "$session_dir/state.md" ]]; then echo "Active" @@ -95,7 +97,9 @@ get_rlcr_status() { [[ -z "$dir" ]] && continue local trimmed="${dir%/}" local any_state="" - if [[ -f "$trimmed/finalize-state.md" ]]; then + if [[ -f "$trimmed/methodology-analysis-state.md" ]]; then + any_state="$trimmed/methodology-analysis-state.md" + elif [[ -f "$trimmed/finalize-state.md" ]]; then any_state="$trimmed/finalize-state.md" elif [[ -f "$trimmed/state.md" ]]; then any_state="$trimmed/state.md" diff --git a/scripts/validate-gen-idea-io.sh b/scripts/validate-gen-idea-io.sh new file mode 100755 index 00000000..99c4bb1a --- /dev/null +++ b/scripts/validate-gen-idea-io.sh @@ -0,0 +1,203 @@ +#!/usr/bin/env bash +# validate-gen-idea-io.sh +# Validates input, slug, and output paths for the gen-idea command. +# Exit codes: +# 0 - Success +# 1 - Missing idea input or empty input file +# 2 - Input looks like a path but is not readable, not .md, or does not exist +# 3 - Output parent directory does not exist (user-supplied path only) +# 4 - Output file already exists +# 5 - No write permission to output directory +# 6 - Invalid arguments (including --n out of range) +# 7 - Template file not found (plugin configuration error) + +set -e + +usage() { + echo "Usage: $0 <idea-text-or-path> [--n <int>] [--output <path>]" + echo "" + echo "Arguments:" + echo " <idea-text-or-path> Inline idea text OR path to an existing .md file (required)" + echo " --n Number of directions (default: 6; range: 2-10)" + echo " --output Output draft path (default: .humanize/ideas/<slug>-<timestamp>.md)" + echo " -h, --help Show this help message" + exit 6 +} + +IDEA_INPUT="" +N=6 +OUTPUT_FILE="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --n) + if [[ $# -lt 2 || "$2" == --* ]]; then + echo "ERROR: --n requires a value" + usage + fi + N="$2" + shift 2 + ;; + --output) + if [[ $# -lt 2 || "$2" == --* ]]; then + echo "ERROR: --output requires a value" + usage + fi + OUTPUT_FILE="$2" + shift 2 + ;; + -h|--help) + usage + ;; + --*) + echo "ERROR: Unknown option: $1" + usage + ;; + *) + if [[ -z "$IDEA_INPUT" ]]; then + IDEA_INPUT="$1" + shift + else + echo "ERROR: Unexpected positional argument: $1" + usage + fi + ;; + esac +done + +if [[ -z "$IDEA_INPUT" ]]; then + echo "VALIDATION_ERROR: MISSING_IDEA" + echo "No idea provided. Pass inline text or a .md file path as the first argument." + exit 1 +fi + +if ! [[ "$N" =~ ^[0-9]+$ ]]; then + echo "VALIDATION_ERROR: INVALID_N" + echo "--n must be a non-negative integer; got: $N" + exit 6 +fi +if (( N < 2 || N > 10 )); then + echo "VALIDATION_ERROR: N_OUT_OF_RANGE" + echo "--n must be between 2 and 10 inclusive; got: $N" + exit 6 +fi + +INPUT_MODE="" +IDEA_BODY_FILE="" +SLUG="" + +# Detect whether IDEA_INPUT is meant as a file path. The `-f` test below is +# the primary gate; this heuristic only matters when that test fails and we +# must decide whether to emit INPUT_NOT_FOUND (user meant a path) or treat +# the text as inline. Any whitespace disqualifies the input from path mode, +# so inline ideas that happen to mention a filename like "rename README.md" +# or that contain "/" fall through to inline. Limitation: a real path that +# contains whitespace and does not exist is silently treated as inline. +looks_like_path=false +if [[ "$IDEA_INPUT" != *[[:space:]]* ]]; then + if [[ "$IDEA_INPUT" == *.md || "$IDEA_INPUT" == */* ]]; then + looks_like_path=true + fi +fi + +if [[ -f "$IDEA_INPUT" ]]; then + if [[ "$IDEA_INPUT" != *.md ]]; then + echo "VALIDATION_ERROR: INPUT_NOT_MD" + echo "File input must have .md extension; got: $IDEA_INPUT" + exit 2 + fi + if [[ ! -r "$IDEA_INPUT" ]]; then + echo "VALIDATION_ERROR: INPUT_NOT_READABLE" + echo "Input file is not readable: $IDEA_INPUT" + exit 2 + fi + if [[ ! -s "$IDEA_INPUT" ]]; then + echo "VALIDATION_ERROR: INPUT_EMPTY" + echo "Input file is empty: $IDEA_INPUT" + exit 1 + fi + INPUT_MODE="file" + IDEA_BODY_FILE="$(realpath "$IDEA_INPUT" 2>/dev/null || echo "$IDEA_INPUT")" + base="$(basename "$IDEA_INPUT")" + SLUG="${base%.md}" +elif [[ "$looks_like_path" == true ]]; then + echo "VALIDATION_ERROR: INPUT_NOT_FOUND" + echo "Looks like a file path but does not exist: $IDEA_INPUT" + exit 2 +else + # Inline mode emits the idea body on stdout inside a sentinel block, + # so the caller does not need to consume an on-disk tempfile. This + # avoids leaking user-provided text under $TMPDIR on repeated runs. + INPUT_MODE="inline" + if (( ${#IDEA_INPUT} < 10 )); then + echo "WARNING: short idea (${#IDEA_INPUT} chars); proceeding" + fi + slug_raw="$(printf '%s' "$IDEA_INPUT" | head -c 40 | tr '[:upper:]' '[:lower:]' | sed -E 's/[^a-z0-9-]+/-/g' | sed -E 's/-+/-/g' | sed -E 's/^-+//; s/-+$//')" + if [[ -z "$slug_raw" ]]; then + slug_raw="idea" + fi + SLUG="$slug_raw" +fi + +PROJECT_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" + +DEFAULT_OUTPUT=false +if [[ -z "$OUTPUT_FILE" ]]; then + TIMESTAMP="$(date +%Y%m%d-%H%M%S)" + OUTPUT_FILE="$PROJECT_ROOT/.humanize/ideas/${SLUG}-${TIMESTAMP}.md" + DEFAULT_OUTPUT=true +fi + +OUTPUT_FILE="$(realpath -m "$OUTPUT_FILE" 2>/dev/null || echo "$OUTPUT_FILE")" +OUTPUT_DIR="$(dirname "$OUTPUT_FILE")" + +if [[ "$DEFAULT_OUTPUT" == true ]]; then + mkdir -p "$OUTPUT_DIR" 2>/dev/null || true +fi + +if [[ ! -d "$OUTPUT_DIR" ]]; then + echo "VALIDATION_ERROR: OUTPUT_DIR_NOT_FOUND" + echo "Output directory does not exist: $OUTPUT_DIR" + exit 3 +fi + +if [[ -e "$OUTPUT_FILE" ]]; then + echo "VALIDATION_ERROR: OUTPUT_EXISTS" + echo "Output already exists: $OUTPUT_FILE" + exit 4 +fi + +if [[ ! -w "$OUTPUT_DIR" ]]; then + echo "VALIDATION_ERROR: NO_WRITE_PERMISSION" + echo "No write permission: $OUTPUT_DIR" + exit 5 +fi + +if [[ -n "${CLAUDE_PLUGIN_ROOT:-}" ]]; then + TEMPLATE_FILE="$CLAUDE_PLUGIN_ROOT/prompt-template/idea/gen-idea-template.md" +else + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" + TEMPLATE_FILE="$SCRIPT_DIR/../prompt-template/idea/gen-idea-template.md" +fi + +if [[ ! -f "$TEMPLATE_FILE" ]]; then + echo "VALIDATION_ERROR: TEMPLATE_NOT_FOUND" + echo "Template file missing: $TEMPLATE_FILE" + exit 7 +fi + +echo "VALIDATION_SUCCESS" +echo "INPUT_MODE: $INPUT_MODE" +if [[ "$INPUT_MODE" == "file" ]]; then + echo "IDEA_BODY_FILE: $IDEA_BODY_FILE" +fi +echo "OUTPUT_FILE: $OUTPUT_FILE" +echo "SLUG: $SLUG" +echo "TEMPLATE_FILE: $TEMPLATE_FILE" +echo "N: $N" +if [[ "$INPUT_MODE" == "inline" ]]; then + echo "=== IDEA_BODY_BEGIN ===" + printf '%s\n' "$IDEA_INPUT" + echo "=== IDEA_BODY_END ===" +fi +exit 0 diff --git a/scripts/validate-gen-plan-io.sh b/scripts/validate-gen-plan-io.sh index cc3fd18b..90eac475 100755 --- a/scripts/validate-gen-plan-io.sh +++ b/scripts/validate-gen-plan-io.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # validate-gen-plan-io.sh # Validates input and output paths for the gen-plan command # Exit codes: diff --git a/scripts/validate-refine-plan-io.sh b/scripts/validate-refine-plan-io.sh index f0ca70c0..111281a0 100755 --- a/scripts/validate-refine-plan-io.sh +++ b/scripts/validate-refine-plan-io.sh @@ -1,11 +1,11 @@ -#!/bin/bash +#!/usr/bin/env bash # validate-refine-plan-io.sh # Validates input and output paths for the refine-plan command # Exit codes: # 0 - Success, all validations passed # 1 - Input file does not exist # 2 - Input file is empty -# 3 - Input file has no valid CMT:/ENDCMT blocks or has malformed CMT syntax +# 3 - Input file has no valid comment blocks or has malformed comment syntax # 4 - Input file missing required gen-plan sections # 5 - Output directory does not exist or is not writable, or input directory is not writable for in-place mode # 6 - QA directory not writable @@ -51,14 +51,67 @@ scan_cmt_blocks() { heading = current_heading() if (kind == "nested") { - printf "Comment parse error: nested CMT block at line %d, column %d near \"%s\" (context: \"%s\")\n", line_num, column, heading, excerpt > "/dev/stderr" + printf "Comment parse error: nested comment block at line %d, column %d near \"%s\" (context: \"%s\")\n", line_num, column, heading, excerpt > "/dev/stderr" } else if (kind == "stray_end") { - printf "Comment parse error: stray ENDCMT at line %d, column %d near \"%s\" (context: \"%s\")\n", line_num, column, heading, excerpt > "/dev/stderr" + printf "Comment parse error: stray comment end marker at line %d, column %d near \"%s\" (context: \"%s\")\n", line_num, column, heading, excerpt > "/dev/stderr" } exit fatal_code } + function find_comment_markers(text, start_pos, markers, i, pos, min_pos, closest_marker, closest_pos) { + # Initialize markers array + markers["CMT:"] = "classic_start" + markers["<cmt>"] = "cmt_tag_start" + markers["<comment>"] = "comment_tag_start" + markers["ENDCMT"] = "classic_end" + markers["</cmt>"] = "cmt_tag_end" + markers["</comment>"] = "comment_tag_end" + markers["<!--"] = "html_start" + markers["-->"] = "html_end" + + closest_marker = "" + closest_pos = 0 + min_pos = length(text) + 1 + + for (marker in markers) { + pos = index(substr(text, start_pos), marker) + if (pos > 0) { + pos = start_pos + pos - 1 + if (pos < min_pos) { + min_pos = pos + closest_marker = marker + closest_pos = pos + } + } + } + + if (closest_marker == "") { + return "" + } else { + return closest_marker "|" closest_pos + } + } + + function get_end_marker_for_format(format) { + if (format == "classic") return "ENDCMT" + if (format == "cmt_tag") return "</cmt>" + if (format == "comment_tag") return "</comment>" + return "" + } + + function get_marker_length(marker) { + if (marker == "CMT:") return 4 + if (marker == "<cmt>") return 5 + if (marker == "<comment>") return 9 + if (marker == "ENDCMT") return 6 + if (marker == "</cmt>") return 6 + if (marker == "</comment>") return 10 + if (marker == "<!--") return 4 + if (marker == "-->") return 3 + return 0 + } + BEGIN { count = 0 in_fence = 0 @@ -71,6 +124,7 @@ scan_cmt_blocks() { cmt_open_heading = "Preamble" cmt_open_excerpt = "" cmt_has_text = 0 + cmt_format = "" # Track format: "classic", "cmt_tag", "comment_tag" fatal = 0 fatal_code = 0 } @@ -126,26 +180,8 @@ scan_cmt_blocks() { } if (in_cmt) { - html_rel = index(rest, "<!--") - end_rel = index(rest, "ENDCMT") - nested_rel = index(rest, "CMT:") - token_rel = 0 - token_type = "" - - if (html_rel > 0) { - token_rel = html_rel - token_type = "html" - } - if (end_rel > 0 && (token_rel == 0 || end_rel < token_rel)) { - token_rel = end_rel - token_type = "end" - } - if (nested_rel > 0 && (token_rel == 0 || nested_rel < token_rel)) { - token_rel = nested_rel - token_type = "nested" - } - - if (token_rel == 0) { + marker_info = find_comment_markers(line, pos) + if (marker_info == "") { if (has_non_ws(rest)) { cmt_has_text = 1 } @@ -153,77 +189,114 @@ scan_cmt_blocks() { break } + split(marker_info, parts, "|") + found_marker = parts[1] + marker_pos = parts[2] + token_rel = marker_pos - pos + 1 + segment = substr(rest, 1, token_rel - 1) if (has_non_ws(segment)) { cmt_has_text = 1 } - if (token_type == "html") { + if (found_marker == "<!--") { cmt_has_text = 1 in_html = 1 pos += token_rel + 3 continue } - if (token_type == "nested") { - emit_error("nested", NR, pos + token_rel - 1, context_excerpt(line, pos + token_rel - 1)) - } + expected_end = get_end_marker_for_format(cmt_format) + if (found_marker == expected_end) { + # Found correct end marker + if (cmt_has_text) { + count++ + } - if (cmt_has_text) { - count++ + in_cmt = 0 + cmt_has_text = 0 + cmt_open_line = 0 + cmt_open_col = 0 + cmt_open_heading = "Preamble" + cmt_open_excerpt = "" + cmt_format = "" + pos += token_rel + get_marker_length(found_marker) - 1 + continue } - in_cmt = 0 - cmt_has_text = 0 - cmt_open_line = 0 - cmt_open_col = 0 - cmt_open_heading = "Preamble" - cmt_open_excerpt = "" - pos += token_rel + 5 - continue - } + # Check for nested comment start + if (found_marker == "CMT:" || found_marker == "<cmt>" || found_marker == "<comment>") { + emit_error("nested", NR, marker_pos, context_excerpt(line, marker_pos)) + } - html_rel = index(rest, "<!--") - cmt_rel = index(rest, "CMT:") - end_rel = index(rest, "ENDCMT") - token_rel = 0 - token_type = "" + # Check for wrong end marker + if (found_marker == "ENDCMT" || found_marker == "</cmt>" || found_marker == "</comment>") { + emit_error("stray_end", NR, marker_pos, context_excerpt(line, marker_pos)) + } - if (html_rel > 0) { - token_rel = html_rel - token_type = "html" - } - if (cmt_rel > 0 && (token_rel == 0 || cmt_rel < token_rel)) { - token_rel = cmt_rel - token_type = "cmt" - } - if (end_rel > 0 && (token_rel == 0 || end_rel < token_rel)) { - token_rel = end_rel - token_type = "stray_end" + pos = marker_pos + 1 + continue } - if (token_rel == 0) { + # Not in comment block + marker_info = find_comment_markers(line, pos) + if (marker_info == "") { break } - if (token_type == "html") { + split(marker_info, parts, "|") + found_marker = parts[1] + marker_pos = parts[2] + token_rel = marker_pos - pos + 1 + + if (found_marker == "<!--") { in_html = 1 pos += token_rel + 3 continue } - if (token_type == "cmt") { + if (found_marker == "CMT:") { in_cmt = 1 + cmt_format = "classic" cmt_has_text = 0 cmt_open_line = NR - cmt_open_col = pos + token_rel - 1 + cmt_open_col = marker_pos cmt_open_heading = current_heading() - cmt_open_excerpt = context_excerpt(line, cmt_open_col) + cmt_open_excerpt = context_excerpt(line, marker_pos) pos += token_rel + 3 continue } - emit_error("stray_end", NR, pos + token_rel - 1, context_excerpt(line, pos + token_rel - 1)) + if (found_marker == "<cmt>") { + in_cmt = 1 + cmt_format = "cmt_tag" + cmt_has_text = 0 + cmt_open_line = NR + cmt_open_col = marker_pos + cmt_open_heading = current_heading() + cmt_open_excerpt = context_excerpt(line, marker_pos) + pos += token_rel + 4 + continue + } + + if (found_marker == "<comment>") { + in_cmt = 1 + cmt_format = "comment_tag" + cmt_has_text = 0 + cmt_open_line = NR + cmt_open_col = marker_pos + cmt_open_heading = current_heading() + cmt_open_excerpt = context_excerpt(line, marker_pos) + pos += token_rel + 8 + continue + } + + # Stray end markers + if (found_marker == "ENDCMT" || found_marker == "</cmt>" || found_marker == "</comment>") { + emit_error("stray_end", NR, marker_pos, context_excerpt(line, marker_pos)) + } + + pos = marker_pos + 1 } } @@ -233,7 +306,7 @@ scan_cmt_blocks() { } if (in_cmt) { - printf "Comment parse error: missing ENDCMT for block opened at line %d, column %d near \"%s\" (context: \"%s\")\n", cmt_open_line, cmt_open_col, cmt_open_heading, cmt_open_excerpt > "/dev/stderr" + printf "Comment parse error: missing end marker for block opened at line %d, column %d near \"%s\" (context: \"%s\")\n", cmt_open_line, cmt_open_col, cmt_open_heading, cmt_open_excerpt > "/dev/stderr" exit 2 } @@ -304,7 +377,9 @@ scan_sections() { if (in_cmt) { html_rel = index(rest, "<!--") - end_rel = index(rest, "ENDCMT") + classic_end_rel = index(rest, "ENDCMT") + cmt_tag_end_rel = index(rest, "</cmt>") + comment_tag_end_rel = index(rest, "</comment>") token_rel = 0 token_type = "" @@ -312,9 +387,17 @@ scan_sections() { token_rel = html_rel token_type = "html" } - if (end_rel > 0 && (token_rel == 0 || end_rel < token_rel)) { - token_rel = end_rel - token_type = "end" + if (classic_end_rel > 0 && (token_rel == 0 || classic_end_rel < token_rel)) { + token_rel = classic_end_rel + token_type = "classic_end" + } + if (cmt_tag_end_rel > 0 && (token_rel == 0 || cmt_tag_end_rel < token_rel)) { + token_rel = cmt_tag_end_rel + token_type = "cmt_tag_end" + } + if (comment_tag_end_rel > 0 && (token_rel == 0 || comment_tag_end_rel < token_rel)) { + token_rel = comment_tag_end_rel + token_type = "comment_tag_end" } if (token_rel == 0) { @@ -329,12 +412,20 @@ scan_sections() { } in_cmt = 0 - pos += token_rel + 5 + if (token_type == "classic_end") { + pos += token_rel + 5 + } else if (token_type == "cmt_tag_end") { + pos += token_rel + 5 + } else if (token_type == "comment_tag_end") { + pos += token_rel + 9 + } continue } html_rel = index(rest, "<!--") - cmt_rel = index(rest, "CMT:") + classic_start_rel = index(rest, "CMT:") + cmt_tag_start_rel = index(rest, "<cmt>") + comment_tag_start_rel = index(rest, "<comment>") token_rel = 0 token_type = "" @@ -342,9 +433,17 @@ scan_sections() { token_rel = html_rel token_type = "html" } - if (cmt_rel > 0 && (token_rel == 0 || cmt_rel < token_rel)) { - token_rel = cmt_rel - token_type = "cmt" + if (classic_start_rel > 0 && (token_rel == 0 || classic_start_rel < token_rel)) { + token_rel = classic_start_rel + token_type = "classic_start" + } + if (cmt_tag_start_rel > 0 && (token_rel == 0 || cmt_tag_start_rel < token_rel)) { + token_rel = cmt_tag_start_rel + token_type = "cmt_tag_start" + } + if (comment_tag_start_rel > 0 && (token_rel == 0 || comment_tag_start_rel < token_rel)) { + token_rel = comment_tag_start_rel + token_type = "comment_tag_start" } if (token_rel == 0) { @@ -361,7 +460,13 @@ scan_sections() { } in_cmt = 1 - pos += token_rel + 3 + if (token_type == "classic_start") { + pos += token_rel + 3 + } else if (token_type == "cmt_tag_start") { + pos += token_rel + 4 + } else if (token_type == "comment_tag_start") { + pos += token_rel + 8 + } } visible = trim(visible) @@ -481,20 +586,22 @@ if [[ ! -s "$INPUT_FILE" ]]; then exit 2 fi -# Check 3: Input file has at least one valid, non-empty CMT:/ENDCMT block +# Check 3: Input file has at least one valid, non-empty comment block CMT_SCAN_OUTPUT="" if ! CMT_SCAN_OUTPUT=$(scan_cmt_blocks "$INPUT_FILE" 2>&1); then - echo "VALIDATION_ERROR: INVALID_CMT_BLOCKS" + echo "VALIDATION_ERROR: INVALID_COMMENT_BLOCKS" echo "$CMT_SCAN_OUTPUT" - echo "Please fix malformed CMT:/ENDCMT blocks before running refine-plan." + echo "Please fix malformed comment blocks before running refine-plan." + echo "Supported formats: CMT:/ENDCMT, <cmt></cmt>, <comment></comment>" exit 3 fi CMT_BLOCK_COUNT=$(printf '%s' "$CMT_SCAN_OUTPUT" | tr -d '[:space:]') if [[ "$CMT_BLOCK_COUNT" -eq 0 ]]; then - echo "VALIDATION_ERROR: NO_CMT_BLOCKS" - echo "The input file has no valid non-empty CMT:/ENDCMT blocks after parsing: $INPUT_FILE" + echo "VALIDATION_ERROR: NO_COMMENT_BLOCKS" + echo "The input file has no valid non-empty comment blocks after parsing: $INPUT_FILE" echo "Markers inside HTML comments or fenced code are ignored, and empty blocks do not count." + echo "Supported formats: CMT:/ENDCMT, <cmt></cmt>, <comment></comment>" exit 3 fi @@ -573,7 +680,7 @@ fi # All checks passed INPUT_LINE_COUNT=$(wc -l < "$INPUT_FILE" | tr -d ' ') echo "VALIDATION_SUCCESS" -echo "Input file: $INPUT_FILE ($INPUT_LINE_COUNT lines, $CMT_BLOCK_COUNT CMT blocks)" +echo "Input file: $INPUT_FILE ($INPUT_LINE_COUNT lines, $CMT_BLOCK_COUNT comment blocks)" echo "Output target: $OUTPUT_FILE" if [[ "$OUTPUT_FILE" == "$INPUT_FILE" ]]; then echo "Mode: in-place (atomic write with temp file)" diff --git a/skills/ask-codex/SKILL.md b/skills/ask-codex/SKILL.md index 486e8fcb..a09a6b0b 100644 --- a/skills/ask-codex/SKILL.md +++ b/skills/ask-codex/SKILL.md @@ -24,7 +24,7 @@ If the user supplied flags such as `--codex-model` or `--codex-timeout`, reconst Example: ```bash -"${CLAUDE_PLUGIN_ROOT}/scripts/ask-codex.sh" --codex-model gpt-5.4:high "Review the following round summary (M4)..." +"${CLAUDE_PLUGIN_ROOT}/scripts/ask-codex.sh" --codex-model gpt-5.5:high "Review the following round summary (M4)..." ``` Never run this unsafe form: @@ -53,4 +53,4 @@ because the shell will re-parse the question text and can fail before `ask-codex ## Notes - The response is saved to `.humanize/skill/<timestamp>/output.md` for reference -- Default model is `gpt-5.4:high` with a 3600-second timeout +- Default model is `gpt-5.5:high` with a 3600-second timeout diff --git a/skills/ask-gemini/SKILL.md b/skills/ask-gemini/SKILL.md new file mode 100644 index 00000000..e31cdd57 --- /dev/null +++ b/skills/ask-gemini/SKILL.md @@ -0,0 +1,61 @@ +--- +name: ask-gemini +description: Consult Gemini as an independent expert with deep web research. Sends a question or task to Gemini CLI and returns a research-backed response. +argument-hint: "[--gemini-model MODEL] [--gemini-timeout SECONDS] [question or task]" +allowed-tools: "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/ask-gemini.sh:*)" +--- + +# Ask Gemini + +Send a question or task to Gemini and return a research-backed response. +Gemini is always instructed to perform web research via Google Search, +making this ideal for deep-research tasks that benefit from up-to-date +internet information. + +## How to Use + +Do not pass free-form user text to the shell unquoted. The question or task may contain spaces or shell metacharacters such as `(`, `)`, `;`, `#`, `*`, or `[`. + +If the user only supplied a question or task, execute: + +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/ask-gemini.sh" "$ARGUMENTS" +``` + +If the user supplied flags such as `--gemini-model` or `--gemini-timeout`, reconstruct the command so those flags remain separate shell arguments and the remaining free-form question is passed as one quoted final argument. + +Example: + +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/ask-gemini.sh" --gemini-model gemini-2.5-pro "What are the latest Rust async runtime benchmarks?" +``` + +Never run this unsafe form: + +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/ask-gemini.sh" $ARGUMENTS +``` + +because the shell will re-parse the question text and can fail before `ask-gemini.sh` starts. + +## Interpreting Output + +- The script outputs Gemini's response to **stdout** and status info to **stderr** +- Read the stdout output carefully and incorporate Gemini's response into your answer +- Gemini's responses are research-backed with web sources; relay source citations when available +- If the script exits with a non-zero code, report the error to the user + +## Error Handling + +| Exit Code | Meaning | +|-----------|---------| +| 0 | Success - Gemini response is in stdout | +| 1 | Validation error (missing gemini, empty question, invalid flags) | +| 124 | Timeout - suggest using `--gemini-timeout` with a larger value | +| Other | Gemini process error - report the exit code and any stderr output | + +## Notes + +- The response is saved to `.humanize/skill/<timestamp>/output.md` for reference +- Default model is `gemini-3.1-pro-preview` with a 3600-second timeout +- Gemini is always instructed to perform Google Search for up-to-date information diff --git a/skills/humanize-rlcr/SKILL.md b/skills/humanize-rlcr/SKILL.md index e65a05b6..6166e1b7 100644 --- a/skills/humanize-rlcr/SKILL.md +++ b/skills/humanize-rlcr/SKILL.md @@ -1,21 +1,15 @@ --- name: humanize-rlcr -description: Start RLCR (Ralph-Loop with Codex Review) with hook-equivalent enforcement from skill mode by reusing the existing stop-hook logic. +description: Start RLCR (Ralph-Loop with Codex Review) on Codex using the native Stop hook. type: flow user-invocable: false disable-model-invocation: true --- -# Humanize RLCR Loop (Hook-Equivalent) +# Humanize RLCR Loop -Use this flow to run RLCR in environments without native hooks. -Do not re-implement review logic manually. Always call the RLCR stop gate wrapper: - -```bash -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/rlcr-stop-gate.sh" -``` - -The wrapper executes `hooks/loop-codex-stop-hook.sh`, so skill-mode behavior stays aligned with hook-mode behavior. +Use this flow as the Codex entrypoint for RLCR. +Codex installs of Humanize require native hooks support and install the Humanize `Stop` hooks automatically. ## Runtime Root @@ -49,24 +43,13 @@ For each round: 4. Write required summary file: - Normal phase: `.humanize/rlcr/<timestamp>/round-<N>-summary.md` - Finalize phase: `.humanize/rlcr/<timestamp>/finalize-summary.md` -5. Run gate command: - -```bash -GATE_CMD=("{{HUMANIZE_RUNTIME_ROOT}}/scripts/rlcr-stop-gate.sh") -[[ -n "${CLAUDE_SESSION_ID:-}" ]] && GATE_CMD+=(--session-id "$CLAUDE_SESSION_ID") -[[ -n "${CLAUDE_TRANSCRIPT_PATH:-}" ]] && GATE_CMD+=(--transcript-path "$CLAUDE_TRANSCRIPT_PATH") -"${GATE_CMD[@]}" -GATE_EXIT=$? -``` - -6. Handle gate result: - - `0`: loop is allowed to exit (done). - - `10`: blocked by RLCR logic. Follow returned instructions exactly, continue next round. - - `20`: infrastructure error (wrapper/hook/runtime). Report error, do not fake completion. +5. Stop or exit normally. +6. Let the native Humanize `Stop` hook run automatically. +7. If the hook blocks exit, follow the returned instructions exactly and continue the next round. ## What This Enforces -By routing through the stop-hook logic, this skill enforces: +The native Stop-hook path enforces: - state/schema validation (`current_round`, `max_iterations`, `review_started`, `base_branch`, etc.) - branch consistency checks @@ -86,8 +69,8 @@ By routing through the stop-hook logic, this skill enforces: ## Critical Rules 1. Never manually edit `state.md` or `finalize-state.md`. -2. Never skip a blocked gate result by declaring completion manually. -3. Never run ad-hoc `codex exec` / `codex review` in place of the gate for phase transitions. +2. Never skip a blocked hook result by declaring completion manually. +3. Never run ad-hoc `codex exec` / `codex review` in place of the hook-managed phase transitions. 4. Always use files generated by the loop (`round-*-prompt.md`, `round-*-review-result.md`) as source of truth. ## Options @@ -100,7 +83,7 @@ Pass these through `setup-rlcr-loop.sh`: | `--plan-file <path>` | Explicit plan path | - | | `--track-plan-file` | Enforce tracked plan immutability | false | | `--max N` | Maximum iterations | 42 | -| `--codex-model MODEL:EFFORT` | Codex model and effort for `codex exec` | gpt-5.4:high | +| `--codex-model MODEL:EFFORT` | Codex model and effort for `codex exec` | gpt-5.5:high | | `--codex-timeout SECONDS` | Codex timeout | 5400 | | `--base-branch BRANCH` | Base for review phase | auto-detect | | `--full-review-round N` | Full alignment interval | 5 | @@ -111,7 +94,7 @@ Pass these through `setup-rlcr-loop.sh`: | `--yolo` | Skip quiz and enable --claude-answer-codex | false | | `--skip-quiz` | Skip Plan Understanding Quiz (implicit in skill mode) | false | -Review phase `codex review` runs with `gpt-5.4:high`. +Review phase `codex review` runs with `gpt-5.5:high`. ## Usage @@ -121,9 +104,6 @@ Review phase `codex review` runs with `gpt-5.4:high`. # Review-only mode /flow:humanize-rlcr --skip-impl - -# Load skill without auto-execution -/skill:humanize-rlcr ``` ## Cancel diff --git a/skills/humanize/SKILL.md b/skills/humanize/SKILL.md index 1b916306..558e7e1d 100644 --- a/skills/humanize/SKILL.md +++ b/skills/humanize/SKILL.md @@ -1,6 +1,6 @@ --- name: humanize -description: Iterative development with AI review. Provides RLCR (Ralph-Loop with Codex Review) for implementation planning and code review loops, plus PR review automation with bot monitoring. +description: Iterative development with AI review. Provides RLCR (Ralph-Loop with Codex Review) for implementation planning and code review loops. user-invocable: false disable-model-invocation: true --- @@ -45,21 +45,9 @@ The RLCR (Ralph-Loop with Codex Review) loop has two phases: - Issues marked with `[P0-9]` severity markers - If issues found → AI fixes them and continues - If no issues → loop completes with Finalize Phase -- In skill mode, always run `{{HUMANIZE_RUNTIME_ROOT}}/scripts/rlcr-stop-gate.sh` to enforce hook-equivalent transitions and blocking +- On Codex CLI `0.114.0+` with `codex_hooks` enabled, Humanize installs a native `Stop` hook so exit gating runs automatically -### 2. PR Loop - Automated PR Review Handling - -Automates handling of GitHub PR reviews from remote bots: - -1. Detects the PR associated with the current branch -2. Fetches review comments from specified bot(s) (`--claude` and/or `--codex`) -3. AI analyzes and fixes issues identified by the bot(s) -4. Pushes changes and triggers re-review by commenting @bot -5. Stop Hook polls for new bot reviews (every 30s, 15min timeout per bot) -6. Local Codex validates if remote concerns are resolved -7. Loop continues until all bots approve or max iterations reached - -### 3. Generate Plan - Structured Plan from Draft +### 2. Generate Plan - Structured Plan from Draft Transforms a rough draft document into a structured implementation plan with: - Clear goal description @@ -80,15 +68,12 @@ Transforms a rough draft document into a structured implementation plan with: "{{HUMANIZE_RUNTIME_ROOT}}/scripts/setup-rlcr-loop.sh" --skip-impl ``` -```bash -# For each round, run the RLCR gate (required) -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/rlcr-stop-gate.sh" -``` +After each round, write the required summary and stop/exit normally. Humanize's native Codex `Stop` hook handles review gating automatically. **Common Options:** - `--max N` - Maximum iterations before auto-stop (default: 42) -- `--codex-model MODEL:EFFORT` - Codex model and reasoning effort for `codex exec` (default: gpt-5.4:high) -- Review phase `codex review` uses `gpt-5.4:high` +- `--codex-model MODEL:EFFORT` - Codex model and reasoning effort for `codex exec` (default: gpt-5.5:high) +- Review phase `codex review` uses `gpt-5.5:high` - `--codex-timeout SECONDS` - Timeout for each Codex review (default: 5400) - `--base-branch BRANCH` - Base branch for code review (auto-detects if not specified) - `--full-review-round N` - Interval for full alignment checks (default: 5) @@ -99,6 +84,7 @@ Transforms a rough draft document into a structured implementation plan with: - `--agent-teams` - Enable Agent Teams mode - `--yolo` - Skip Plan Understanding Quiz and enable --claude-answer-codex - `--skip-quiz` - Skip the Plan Understanding Quiz only +- `--privacy` - Disable methodology analysis at loop exit (default: analysis enabled) ### Cancel RLCR Loop @@ -108,30 +94,6 @@ Transforms a rough draft document into a structured implementation plan with: "{{HUMANIZE_RUNTIME_ROOT}}/scripts/cancel-rlcr-loop.sh" --force ``` -### Start PR Loop - -```bash -# Monitor claude[bot] reviews -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/setup-pr-loop.sh" --claude - -# Monitor chatgpt-codex-connector[bot] reviews -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/setup-pr-loop.sh" --codex - -# Monitor both -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/setup-pr-loop.sh" --claude --codex -``` - -**Common Options:** -- `--max N` - Maximum iterations (default: 42) -- `--codex-model MODEL:EFFORT` - Codex model for validation (default: gpt-5.4:medium) -- `--codex-timeout SECONDS` - Timeout for Codex validation (default: 900) - -### Cancel PR Loop - -```bash -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/cancel-pr-loop.sh" -``` - ### Generate Plan from Draft ```bash @@ -207,13 +169,13 @@ The RLCR loop uses a Goal Tracker to prevent goal drift: 2. **Maintain Goal Tracker**: Keep goal-tracker.md up-to-date with progress 3. **Be thorough**: Include details about implementation, files changed, tests added 4. **No cheating**: Don't try to exit by editing state files or running cancel commands -5. **Run stop gate each round**: Use `scripts/rlcr-stop-gate.sh` instead of manual phase control +5. **Use the native Stop hook on Codex**: After writing the required summary, stop/exit normally so Codex runs the Humanize Stop hook 6. **Trust the process**: External review helps improve implementation quality ## Prerequisites - `codex` - OpenAI Codex CLI (for review) -- `gh` - GitHub CLI (for PR loop) + ## Directory Structure @@ -229,11 +191,10 @@ Humanize stores all data in `.humanize/`: │ ├── round-N-review-result.md │ ├── finalize-state.md │ ├── finalize-summary.md +│ ├── methodology-analysis-state.md +│ ├── methodology-analysis-report.md +│ ├── methodology-analysis-done.md │ └── complete-state.md -├── pr-loop/ # PR loop data -│ └── <timestamp>/ -│ ├── state.md -│ └── resolution-N.md └── skill/ # One-shot skill results └── <timestamp>/ ├── input.md @@ -248,7 +209,6 @@ Use the monitor script to track loop progress: ```bash source "{{HUMANIZE_RUNTIME_ROOT}}/scripts/humanize.sh" humanize monitor rlcr # Monitor RLCR loop -humanize monitor pr # Monitor PR loop ``` ## Exit Codes diff --git a/tests/manual-monitor-test.sh b/tests/manual-monitor-test.sh index 5a76cd54..bb586f47 100644 --- a/tests/manual-monitor-test.sh +++ b/tests/manual-monitor-test.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Manual Test Script for tests # diff --git a/tests/mocks/gh b/tests/mocks/gh index 5cf767d1..7d4dff06 100755 --- a/tests/mocks/gh +++ b/tests/mocks/gh @@ -1,6 +1,6 @@ -#!/bin/bash +#!/usr/bin/env bash # -# Mock gh CLI for testing PR loop functionality +# Mock gh CLI for testing GitHub API interactions # # This mock intercepts gh commands and returns predefined responses # based on environment variables and fixture files. diff --git a/tests/robustness/test-base-branch-detection.sh b/tests/robustness/test-base-branch-detection.sh index 92ce1825..bb31cd3a 100755 --- a/tests/robustness/test-base-branch-detection.sh +++ b/tests/robustness/test-base-branch-detection.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for base branch auto-detection # diff --git a/tests/robustness/test-cancel-security-robustness.sh b/tests/robustness/test-cancel-security-robustness.sh index 524cc19e..7ecc3c3c 100755 --- a/tests/robustness/test-cancel-security-robustness.sh +++ b/tests/robustness/test-cancel-security-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for cancel operation security # diff --git a/tests/robustness/test-concurrent-state-robustness.sh b/tests/robustness/test-concurrent-state-robustness.sh index 74ae84f6..57115bcf 100755 --- a/tests/robustness/test-concurrent-state-robustness.sh +++ b/tests/robustness/test-concurrent-state-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for concurrent state access # @@ -386,61 +386,6 @@ else fail "Unicode content" "2" "$ROUND" fi -# ======================================== -# PR Loop State Tests -# ======================================== - -echo "" -echo "--- PR Loop State Tests ---" -echo "" - -# Test 16: find_active_pr_loop works correctly -echo "Test 16: find_active_pr_loop detection" -mkdir -p "$TEST_DIR/pr-loops/pr-loop/2026-01-19_12-00-00" -cat > "$TEST_DIR/pr-loops/pr-loop/2026-01-19_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 ---- -EOF - -ACTIVE=$(find_active_pr_loop "$TEST_DIR/pr-loops/pr-loop" 2>/dev/null || echo "") -if [[ "$ACTIVE" == *"2026-01-19"* ]]; then - pass "find_active_pr_loop works correctly" -else - fail "find_active_pr_loop" "*2026-01-19*" "$ACTIVE" -fi - -# Test 17: PR loop state with YAML list for active_bots -echo "" -echo "Test 17: PR loop state with YAML list" -mkdir -p "$TEST_DIR/pr-yaml" -cat > "$TEST_DIR/pr-yaml/state.md" << 'EOF' ---- -current_round: 1 -active_bots: - - claude - - codex -configured_bots: - - claude - - codex ---- -EOF - -# Test that we can read the state file without errors -if [[ -f "$TEST_DIR/pr-yaml/state.md" ]]; then - # Check if file contains expected YAML structure - if grep -q "^ - claude$" "$TEST_DIR/pr-yaml/state.md" && \ - grep -q "^ - codex$" "$TEST_DIR/pr-yaml/state.md"; then - pass "PR loop YAML list format validated" - else - fail "YAML list format" "list items" "missing" - fi -else - fail "YAML list" "file exists" "file not found" -fi - # ======================================== # Stale Loop Detection Tests # ======================================== diff --git a/tests/robustness/test-git-operations-robustness.sh b/tests/robustness/test-git-operations-robustness.sh index d409c0ed..a80114c8 100755 --- a/tests/robustness/test-git-operations-robustness.sh +++ b/tests/robustness/test-git-operations-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for git operation scripts # diff --git a/tests/robustness/test-goal-tracker-robustness.sh b/tests/robustness/test-goal-tracker-robustness.sh index fe4c025b..de642f0b 100755 --- a/tests/robustness/test-goal-tracker-robustness.sh +++ b/tests/robustness/test-goal-tracker-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for goal tracker parsing # @@ -50,6 +50,16 @@ parse_result() { esac } +parse_issue_result() { + local result="$1" + local field="$2" + case "$field" in + blocking_issues) echo "$result" | cut -d'|' -f1 ;; + queued_issues) echo "$result" | cut -d'|' -f2 ;; + open_issues) echo "$result" | cut -d'|' -f3 ;; + esac +} + # ======================================== # Positive Tests - Valid Goal Tracker # ======================================== @@ -438,6 +448,55 @@ else fail "Deferred tasks count" "2" "$DEFERRED_TASKS" fi +# Test 15b: Distinguish blocking vs queued issues in new schema +echo "" +echo "Test 15b: Distinguish blocking vs queued issues" +cat > "$TEST_DIR/goal-tracker-issue-breakdown.md" << 'EOF' +# Goal Tracker + +### Acceptance Criteria + +- AC-1: Test + +--- + +### Blocking Side Issues + +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| +| Failing review item | 2 | AC-1 | Fix immediately | + +### Queued Side Issues + +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| +| Cleanup follow-up | 2 | Cosmetic only | Next refactor | +| Extra test hardening | 3 | Current AC already met | Regression appears | +EOF + +ISSUE_RESULT=$(humanize_parse_goal_tracker_issue_counts "$TEST_DIR/goal-tracker-issue-breakdown.md") +BLOCKING_ISSUES=$(parse_issue_result "$ISSUE_RESULT" blocking_issues) +QUEUED_ISSUES=$(parse_issue_result "$ISSUE_RESULT" queued_issues) +OPEN_ISSUES=$(parse_issue_result "$ISSUE_RESULT" open_issues) +if [[ "$BLOCKING_ISSUES" == "1" ]] && [[ "$QUEUED_ISSUES" == "2" ]] && [[ "$OPEN_ISSUES" == "3" ]]; then + pass "Separates blocking and queued issues in new schema" +else + fail "Issue breakdown" "1 blocking, 2 queued, 3 total" "$ISSUE_RESULT" +fi + +# Test 15c: Legacy open issues fallback maps to blocking count +echo "" +echo "Test 15c: Legacy open issues fallback maps to blocking count" +ISSUE_RESULT=$(humanize_parse_goal_tracker_issue_counts "$TEST_DIR/goal-tracker-issues.md") +BLOCKING_ISSUES=$(parse_issue_result "$ISSUE_RESULT" blocking_issues) +QUEUED_ISSUES=$(parse_issue_result "$ISSUE_RESULT" queued_issues) +OPEN_ISSUES=$(parse_issue_result "$ISSUE_RESULT" open_issues) +if [[ "$BLOCKING_ISSUES" == "2" ]] && [[ "$QUEUED_ISSUES" == "0" ]] && [[ "$OPEN_ISSUES" == "2" ]]; then + pass "Legacy open issues fallback treated as blocking" +else + fail "Legacy issue fallback" "2 blocking, 0 queued, 2 total" "$ISSUE_RESULT" +fi + # Test 16: File with only headers (no content) echo "" echo "Test 16: File with only section headers" diff --git a/tests/robustness/test-hook-input-robustness.sh b/tests/robustness/test-hook-input-robustness.sh index 6a6c77dd..675ff887 100755 --- a/tests/robustness/test-hook-input-robustness.sh +++ b/tests/robustness/test-hook-input-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for hook input parsing and monitor edge cases # @@ -453,7 +453,7 @@ cd "$MONITOR_TEST_DIR/project" # Create monitor runner script cat > "$MONITOR_TEST_DIR/run_monitor.sh" << 'MONITOR_EOF' -#!/bin/bash +#!/usr/bin/env bash PROJECT_DIR="$1" PROJECT_ROOT="$2" FAKE_HOME="$3" @@ -512,7 +512,7 @@ echo "Test log" > "$FAKE_HOME_MONITOR/.cache/humanize/$SANITIZED/2026-01-17_10-0 # Create narrow terminal runner - calls _humanize_monitor_codex directly in same shell cat > "$MONITOR_TEST_DIR/run_narrow.sh" << 'NARROW_EOF' -#!/bin/bash +#!/usr/bin/env bash PROJECT_DIR="$1" PROJECT_ROOT="$2" FAKE_HOME="$3" @@ -603,7 +603,7 @@ mkdir -p "$FAKE_HOME_MONITOR/.cache/humanize/$SANITIZED3/2026-01-17_11-00-00" printf '\033[31mRed text\033[0m\n\033[1;32mBold green\033[0m\n' > "$FAKE_HOME_MONITOR/.cache/humanize/$SANITIZED3/2026-01-17_11-00-00/round-1-codex-run.log" cat > "$MONITOR_TEST_DIR/run_ansi.sh" << 'ANSI_EOF' -#!/bin/bash +#!/usr/bin/env bash PROJECT_DIR="$1" PROJECT_ROOT="$2" FAKE_HOME="$3" diff --git a/tests/robustness/test-hook-system-robustness.sh b/tests/robustness/test-hook-system-robustness.sh index 8f302bb4..1d4a21f5 100755 --- a/tests/robustness/test-hook-system-robustness.sh +++ b/tests/robustness/test-hook-system-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for all hook scripts # @@ -6,7 +6,6 @@ # - loop-edit-validator.sh # - loop-plan-file-validator.sh # - loop-codex-stop-hook.sh (state parsing) -# - pr-loop-stop-hook.sh (state parsing) # # Focus areas: # - JSON input validation edge cases @@ -317,8 +316,10 @@ echo "" # Test 12: Bash validator blocks state.md modification attempts echo "Test 12: Bash validator blocks state.md modification" # Create RLCR state for the test -mkdir -p "$TEST_DIR/.humanize/rlcr/2026-01-19_12-00-00" -cat > "$TEST_DIR/.humanize/rlcr/2026-01-19_12-00-00/state.md" << 'EOF' +HOOK_LOOP_DIR="$TEST_DIR/.humanize/rlcr/2026-01-19_12-00-00" +OLD_LOOP_DIR="$TEST_DIR/.humanize/rlcr/2026-01-19_11-00-00" +mkdir -p "$HOOK_LOOP_DIR" +cat > "$HOOK_LOOP_DIR/state.md" << 'EOF' --- current_round: 1 max_iterations: 42 @@ -333,6 +334,54 @@ review_started: false plan_tracked: false --- EOF +cat > "$HOOK_LOOP_DIR/goal-tracker.md" << 'EOF' +# Goal Tracker + +## IMMUTABLE SECTION + +### Ultimate Goal +Keep mainline aligned. + +### Acceptance Criteria +- AC-1: Mainline progress is visible every round. + +--- + +## MUTABLE SECTION + +### Plan Version: 1 (Updated: Round 1) + +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| [mainline] Keep AC-1 moving | AC-1 | pending | - | + +### Blocking Side Issues +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| + +### Queued Side Issues +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| +EOF +mkdir -p "$OLD_LOOP_DIR" +cat > "$OLD_LOOP_DIR/goal-tracker.md" << 'EOF' +# Old Goal Tracker + +## IMMUTABLE SECTION + +### Ultimate Goal +Old session tracker. + +### Acceptance Criteria +- AC-1: Old session only. + +--- + +## MUTABLE SECTION + +### Plan Version: 1 (Updated: Round 0) +EOF # Try to modify state.md - this SHOULD be blocked JSON='{"tool_name":"Bash","tool_input":{"command":"echo hacked >> '"$TEST_DIR"'/.humanize/rlcr/2026-01-19_12-00-00/state.md"}}' set +e @@ -366,9 +415,143 @@ else fail "Goal-tracker.md modification" "exit 2 (blocked)" "exit $EXIT_CODE, result: $RESULT" fi -# Test 12c: Unrelated dangerous commands are allowed through (sandbox handles security) +# Test 12c: Write validator allows mutable goal-tracker updates after round 0 +echo "" +echo "Test 12c: Write validator allows mutable goal-tracker updates after round 0" +cat > "$TEST_DIR/goal-tracker-updated.md" << 'EOF' +# Goal Tracker + +## IMMUTABLE SECTION + +### Ultimate Goal +Keep mainline aligned. + +### Acceptance Criteria +- AC-1: Mainline progress is visible every round. + +--- + +## MUTABLE SECTION + +### Plan Version: 1 (Updated: Round 1) + +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| [mainline] Keep AC-1 moving | AC-1 | in_progress | re-anchored | + +### Blocking Side Issues +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| +| failing test for AC-1 | 1 | AC-1 | fix before exit | + +### Queued Side Issues +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| +EOF +UPDATED_CONTENT=$(jq -Rs . < "$TEST_DIR/goal-tracker-updated.md") +JSON='{"tool_name":"Write","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","content":'"$UPDATED_CONTENT"'}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Write allows mutable goal-tracker updates after round 0" +else + fail "Goal-tracker mutable write" "exit 0" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12d: Write validator blocks immutable goal-tracker changes after round 0 echo "" -echo "Test 12c: Unrelated dangerous commands allowed through (sandbox responsibility)" +echo "Test 12d: Write validator blocks immutable goal-tracker changes after round 0" +cat > "$TEST_DIR/goal-tracker-bad.md" << 'EOF' +# Goal Tracker + +## IMMUTABLE SECTION + +### Ultimate Goal +Change the goal entirely. + +### Acceptance Criteria +- AC-1: Mainline progress is visible every round. + +--- + +## MUTABLE SECTION + +### Plan Version: 1 (Updated: Round 1) +EOF +UPDATED_CONTENT=$(jq -Rs . < "$TEST_DIR/goal-tracker-bad.md") +JSON='{"tool_name":"Write","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","content":'"$UPDATED_CONTENT"'}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]]; then + pass "Write blocks immutable goal-tracker changes after round 0" +else + fail "Goal-tracker immutable write" "exit 2" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12e: Edit validator allows mutable goal-tracker edits after round 0 +echo "" +echo "Test 12e: Edit validator allows mutable goal-tracker edits after round 0" +JSON='{"tool_name":"Edit","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","old_string":"| [mainline] Keep AC-1 moving | AC-1 | pending | - |","new_string":"| [mainline] Keep AC-1 moving | AC-1 | in_progress | re-anchored |"}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Edit allows mutable goal-tracker updates after round 0" +else + fail "Goal-tracker mutable edit" "exit 0" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12f: Edit validator blocks immutable goal-tracker edits after round 0 +echo "" +echo "Test 12ea: Edit validator allows mutable deletions after round 0" +JSON='{"tool_name":"Edit","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","old_string":"| [mainline] Keep AC-1 moving | AC-1 | pending | - |","new_string":""}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Edit allows mutable goal-tracker deletions after round 0" +else + fail "Goal-tracker mutable delete" "exit 0" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12f: Edit validator blocks immutable goal-tracker edits after round 0 +echo "" +echo "Test 12f: Edit validator blocks immutable goal-tracker edits after round 0" +JSON='{"tool_name":"Edit","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","old_string":"Keep mainline aligned.","new_string":"Change the goal entirely."}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]]; then + pass "Edit blocks immutable goal-tracker updates after round 0" +else + fail "Goal-tracker immutable edit" "exit 2" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12g: Read validator blocks old-session goal tracker +echo "" +echo "Test 12g: Read validator blocks old-session goal tracker" +JSON='{"tool_name":"Read","tool_input":{"file_path":"'"$OLD_LOOP_DIR"'/goal-tracker.md"}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-read-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]]; then + pass "Read blocks old-session goal-tracker.md" +else + fail "Goal-tracker old-session read" "exit 2" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12h: Unrelated dangerous commands are allowed through (sandbox handles security) +echo "" +echo "Test 12h: Unrelated dangerous commands allowed through (sandbox responsibility)" JSON='{"tool_name":"Bash","tool_input":{"command":"cat /tmp/test; rm -rf /"}}' set +e RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-bash-validator.sh" 2>&1) @@ -474,22 +657,6 @@ else fail "Missing state handling" "exit 0, no block decision" "exit=$EXIT_CODE, output=$OUTPUT" fi -# Test 17: PR stop hook handles missing state gracefully (allows exit) -echo "" -echo "Test 17: PR stop hook allows exit when no state directory" -mkdir -p "$TEST_DIR/no-pr-state" - -set +e -OUTPUT=$(echo '{}' | CLAUDE_PROJECT_DIR="$TEST_DIR/no-pr-state" bash "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) -EXIT_CODE=$? -set -e -# Should exit 0, no block decision -if [[ $EXIT_CODE -eq 0 ]] && ! echo "$OUTPUT" | grep -q '"decision".*:.*"block"'; then - pass "PR stop hook allows exit when no state (no block decision)" -else - fail "PR missing state" "exit 0, no block decision" "exit=$EXIT_CODE" -fi - # Test 18: Stop hook with corrupted state file outputs block decision echo "" echo "Test 18: Stop hook with corrupted state outputs decision" @@ -592,7 +759,7 @@ cd - > /dev/null # Create mock codex to avoid real API calls (review_started: false triggers codex exec) mkdir -p "$TEST_DIR/mock-bin" cat > "$TEST_DIR/mock-bin/codex" << 'MOCKEOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex that returns review output indicating work continues echo "Review: Code looks good but more testing needed." echo "No COMPLETE or STOP markers - work should continue." diff --git a/tests/robustness/test-path-validation-robustness.sh b/tests/robustness/test-path-validation-robustness.sh index be7a916e..56b98528 100755 --- a/tests/robustness/test-path-validation-robustness.sh +++ b/tests/robustness/test-path-validation-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for path validation # @@ -22,7 +22,7 @@ setup_test_dir setup_mock_codex() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << 'MOCKEOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex for test-path-validation-robustness.sh echo "Mock codex output" exit 0 diff --git a/tests/robustness/test-plan-file-robustness.sh b/tests/robustness/test-plan-file-robustness.sh index 7ee5f186..d2f5ee7f 100755 --- a/tests/robustness/test-plan-file-robustness.sh +++ b/tests/robustness/test-plan-file-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for plan file validation # @@ -27,7 +27,7 @@ mkdir -p "$XDG_CACHE_HOME" setup_mock_codex() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << 'MOCKEOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex for test-plan-file-robustness.sh echo "Mock codex output" exit 0 diff --git a/tests/robustness/test-pr-loop-api-fetch.sh b/tests/robustness/test-pr-loop-api-fetch.sh deleted file mode 100755 index e549be08..00000000 --- a/tests/robustness/test-pr-loop-api-fetch.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -# -# PR Loop API fetch/state tests (parallel split 1/2) -# -# Runs Tests 1-11: PR Loop State Handling + fetch-pr-comments + -# Bot Response Parsing + JSON Edge Cases -# -# Sources the shared test library from test-pr-loop-api-robustness.sh -# and invokes the run_fetch_tests group function. -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -source "$SCRIPT_DIR/test-pr-loop-api-robustness.sh" - -run_fetch_tests -print_test_summary "PR Loop API Fetch Tests" -exit $? diff --git a/tests/robustness/test-pr-loop-api-poll.sh b/tests/robustness/test-pr-loop-api-poll.sh deleted file mode 100755 index 234130a6..00000000 --- a/tests/robustness/test-pr-loop-api-poll.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -# -# PR Loop API poll/stop-hook tests (parallel split 2/2) -# -# Runs Tests 12-19: PR Loop Stop Hook + poll-pr-reviews -# -# Sources the shared test library from test-pr-loop-api-robustness.sh -# and invokes the run_poll_tests group function. -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -source "$SCRIPT_DIR/test-pr-loop-api-robustness.sh" - -run_poll_tests -print_test_summary "PR Loop API Poll Tests" -exit $? diff --git a/tests/robustness/test-pr-loop-api-robustness.sh b/tests/robustness/test-pr-loop-api-robustness.sh deleted file mode 100755 index 20b5c165..00000000 --- a/tests/robustness/test-pr-loop-api-robustness.sh +++ /dev/null @@ -1,866 +0,0 @@ -#!/bin/bash -# -# Robustness tests for PR loop API handling -# -# Tests PR loop behavior under API error conditions by invoking actual -# PR loop scripts with mocked gh commands: -# - API failure handling -# - Rate limiting responses -# - Bot response JSON parsing -# - Network error simulation -# - PR loop state file handling -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" -source "$PROJECT_ROOT/hooks/lib/loop-common.sh" -source "$SCRIPT_DIR/../test-helpers.sh" - -setup_test_dir - -echo "========================================" -echo "PR Loop API Robustness Tests" -echo "========================================" -echo "" - -# ======================================== -# Helper Functions -# ======================================== - -# Create a comprehensive mock gh that handles repo view, pr view, and api calls -# This allows fetch-pr-comments.sh to run end-to-end -create_mock_gh() { - local dir="$1" - local behavior="$2" # "empty_array", "rate_limit", "network_error", "bot_comments", etc. - mkdir -p "$dir/bin" - - # Base mock that handles repo view and pr view for all behaviors - # Note: gh CLI applies -q jq queries internally, so we output the final result - # fetch-pr-comments.sh uses: gh repo view --json owner,name -q '...' - # gh pr view PR --repo REPO --json number -q .number - cat > "$dir/bin/gh" << 'GHEOF_START' -#!/bin/bash -# Mock gh command for testing - -# Check for -q flag anywhere in args (jq query) -HAS_Q_FLAG=false -for arg in "$@"; do - if [[ "$arg" == "-q" ]]; then - HAS_Q_FLAG=true - break - fi -done - -# Handle repo view (required by fetch-pr-comments.sh) -if [[ "$1" == "repo" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"owner,name"* ]]; then - if [[ "$HAS_Q_FLAG" == "true" ]]; then - # -q query extracts owner.login + "/" + name - echo "testowner/testrepo" - else - echo '{"owner":{"login":"testowner"},"name":"testrepo"}' - fi - exit 0 - elif [[ "$*" == *"parent"* ]]; then - if [[ "$HAS_Q_FLAG" == "true" ]]; then - # parent query returns empty/null for non-fork - echo "/" - else - echo '{"parent":null}' - fi - exit 0 - fi - fi - echo "testowner/testrepo" - exit 0 -fi - -# Handle pr view (required by fetch-pr-comments.sh) -# PR existence check uses: gh pr view --repo REPO --json number -q .number -if [[ "$1" == "pr" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"number"* ]]; then - echo '{"number": 123}' - else - echo '{"state": "OPEN"}' - fi - exit 0 - fi - echo "PR #123" - exit 0 -fi - -# Handle api calls based on behavior -GHEOF_START - - # Add behavior-specific api handling - case "$behavior" in - empty_array) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - rate_limit) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - echo '{"message":"API rate limit exceeded","documentation_url":"https://docs.github.com/rest/overview/resources-in-the-rest-api#rate-limiting"}' >&2 - exit 1 -fi -echo "[]" -exit 0 -GHEOF - ;; - network_error) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - echo "Connection refused" >&2 - exit 6 -fi -echo "[]" -exit 0 -GHEOF - ;; - auth_failure) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "auth" && "$2" == "status" ]]; then - echo "You are not logged into any GitHub hosts" >&2 - exit 1 -fi -if [[ "$1" == "api" ]]; then - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - claude_approval) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - # Return Claude bot approval for issue comments endpoint - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - cat << 'JSON' -[{"id":1,"user":{"login":"claude[bot]","type":"Bot"},"body":"LGTM! The implementation looks good.","created_at":"2026-01-19T12:00:00Z"}] -JSON - exit 0 - fi - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - codex_issues) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - # Return Codex bot with issues for issue comments endpoint - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - cat << 'JSON' -[{"id":1,"user":{"login":"chatgpt-codex-connector[bot]","type":"Bot"},"body":"[P1] Critical issue found\n[P2] Minor issue","created_at":"2026-01-19T12:00:00Z"}] -JSON - exit 0 - fi - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - mixed_bots) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - # Return mixed bot responses for issue comments endpoint - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - cat << 'JSON' -[{"id":1,"user":{"login":"claude[bot]","type":"Bot"},"body":"LGTM","created_at":"2026-01-19T12:00:00Z"},{"id":2,"user":{"login":"chatgpt-codex-connector[bot]","type":"Bot"},"body":"Approved","created_at":"2026-01-19T12:01:00Z"}] -JSON - exit 0 - fi - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - unicode_comment) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - printf '[{"id":1,"user":{"login":"bot","type":"Bot"},"body":"Good work! \u2705 \u2728","created_at":"2026-01-19T12:00:00Z"}]\n' - exit 0 - fi - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - long_comment) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - # Generate a long comment body - LONG_BODY=$(head -c 10000 /dev/zero 2>/dev/null | tr '\0' 'a' || printf 'a%.0s' {1..10000}) - echo "[{\"id\":1,\"user\":{\"login\":\"bot\",\"type\":\"Bot\"},\"body\":\"$LONG_BODY\",\"created_at\":\"2026-01-19T12:00:00Z\"}]" - exit 0 - fi - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - *) - # Default: return empty array for api calls - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - esac - chmod +x "$dir/bin/gh" -} - -create_pr_loop_state() { - local dir="$1" - local round="${2:-0}" - mkdir -p "$dir/.humanize/pr-loop/2026-01-19_00-00-00" - cat > "$dir/.humanize/pr-loop/2026-01-19_00-00-00/state.md" << EOF ---- -current_round: $round -max_iterations: 42 -pr_number: 123 -pr_owner: testowner -pr_repo: testrepo -base_branch: main -configured_bots: - - claude - - codex -active_bots: - - claude -startup_case: 3 -review_started: false ---- -EOF -} - -init_basic_git_repo() { - local dir="$1" - cd "$dir" - git init -q - git config user.email "test@test.com" - git config user.name "Test User" - git config commit.gpgsign false - git checkout -q -b main 2>/dev/null || git checkout -q main - echo "initial" > file.txt - git add file.txt - git commit -q -m "Initial commit" - cd - > /dev/null -} - -# ======================================== -# Test Group Functions -# ======================================== - -# Tests 1-11: PR Loop State Handling + fetch-pr-comments + Bot Response Parsing + JSON Edge Cases -run_fetch_tests() { - - # ======================================== - # PR Loop State Handling Tests - # ======================================== - - echo "--- PR Loop State Handling Tests ---" - echo "" - - # Test 1: find_active_pr_loop detects PR loop state - echo "Test 1: PR loop state detection" - mkdir -p "$TEST_DIR/prloop1/.humanize/pr-loop/2026-01-19_00-00-00" - create_pr_loop_state "$TEST_DIR/prloop1" - - ACTIVE=$(find_active_pr_loop "$TEST_DIR/prloop1/.humanize/pr-loop" 2>/dev/null || echo "") - if [[ "$ACTIVE" == *"2026-01-19"* ]]; then - pass "PR loop state detected" - else - fail "PR loop detection" "*2026-01-19*" "$ACTIVE" - fi - - # Test 2: PR loop with YAML list active_bots - echo "" - echo "Test 2: PR loop with YAML list active_bots" - mkdir -p "$TEST_DIR/prloop2/.humanize/pr-loop/2026-01-19_00-00-00" - cat > "$TEST_DIR/prloop2/.humanize/pr-loop/2026-01-19_00-00-00/state.md" << 'EOF' ---- -current_round: 1 -max_iterations: 42 -pr_number: 456 -active_bots: - - claude - - codex -configured_bots: - - claude - - codex -base_branch: main -review_started: false ---- -EOF - - # Verify the file can be read - if grep -q "active_bots:" "$TEST_DIR/prloop2/.humanize/pr-loop/2026-01-19_00-00-00/state.md"; then - pass "YAML list active_bots format accepted" - else - fail "YAML list format" "contains active_bots" "not found" - fi - - # Test 3: PR loop state with missing pr_number - echo "" - echo "Test 3: PR loop state with missing pr_number" - mkdir -p "$TEST_DIR/prloop3/.humanize/pr-loop/2026-01-19_00-00-00" - cat > "$TEST_DIR/prloop3/.humanize/pr-loop/2026-01-19_00-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -configured_bots: - - claude -base_branch: main -review_started: false ---- -EOF - - # Should still be detectable as an active loop - ACTIVE=$(find_active_pr_loop "$TEST_DIR/prloop3/.humanize/pr-loop" 2>/dev/null || echo "") - if [[ -n "$ACTIVE" ]]; then - pass "PR loop without pr_number still detected" - else - fail "Missing pr_number" "detected" "not detected" - fi - - # ======================================== - # fetch-pr-comments.sh Tests - # ======================================== - - echo "" - echo "--- fetch-pr-comments.sh Script Tests ---" - echo "" - - # Test 4: Empty JSON array handled by fetch-pr-comments - echo "Test 4: Empty PR comments creates valid output file" - mkdir -p "$TEST_DIR/fetch1" - init_basic_git_repo "$TEST_DIR/fetch1" - create_mock_gh "$TEST_DIR/fetch1" "empty_array" - - set +e - OUTPUT=$(PATH="$TEST_DIR/fetch1/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/fetch1/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - # Must succeed AND create output file with expected content - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/fetch1/comments.md" ]]; then - # Verify output contains expected structure - if grep -q "PR Comments for #123" "$TEST_DIR/fetch1/comments.md" && \ - grep -q "testowner/testrepo" "$TEST_DIR/fetch1/comments.md"; then - pass "Empty PR comments creates valid output (PR#, repo in file)" - else - fail "Empty PR output" "contains PR# and repo" "$(head -10 "$TEST_DIR/fetch1/comments.md")" - fi - else - fail "Empty PR comments" "exit 0 with output file" "exit=$EXIT_CODE" - fi - - # Test 5: Rate limit error produces warning in output - echo "" - echo "Test 5: Rate limit error produces warning" - mkdir -p "$TEST_DIR/fetch2" - init_basic_git_repo "$TEST_DIR/fetch2" - create_mock_gh "$TEST_DIR/fetch2" "rate_limit" - - set +e - OUTPUT=$(PATH="$TEST_DIR/fetch2/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/fetch2/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - # Script may still create output file with warnings about API failures - if [[ -f "$TEST_DIR/fetch2/comments.md" ]]; then - # Check for warning about API failures - if grep -qi "warning\|failed" "$TEST_DIR/fetch2/comments.md" || echo "$OUTPUT" | grep -qi "failed\|error"; then - pass "Rate limit produces warning (exit=$EXIT_CODE)" - else - pass "Rate limit handled gracefully (exit=$EXIT_CODE)" - fi - else - # Non-zero exit without file is acceptable for API errors - if [[ $EXIT_CODE -ne 0 ]]; then - pass "Rate limit error returns non-zero exit ($EXIT_CODE)" - else - fail "Rate limit handling" "non-zero exit or warning" "exit 0, no file" - fi - fi - - # Test 6: Network error handled gracefully - echo "" - echo "Test 6: Network error handled gracefully" - mkdir -p "$TEST_DIR/fetch3" - init_basic_git_repo "$TEST_DIR/fetch3" - create_mock_gh "$TEST_DIR/fetch3" "network_error" - - set +e - OUTPUT=$(PATH="$TEST_DIR/fetch3/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/fetch3/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - # Network errors should produce non-zero exit or warning - if [[ $EXIT_CODE -ne 0 ]] || echo "$OUTPUT" | grep -qi "error\|failed\|connection"; then - pass "Network error handled (exit=$EXIT_CODE)" - else - fail "Network error handling" "non-zero exit or error message" "exit=$EXIT_CODE" - fi - - # ======================================== - # Bot Response Parsing Tests (via fetch-pr-comments.sh) - # ======================================== - - echo "" - echo "--- Bot Response Parsing Tests ---" - echo "" - - # Test 7: Claude bot comments parsed and formatted in output - echo "Test 7: Claude bot comments appear in fetch-pr-comments output" - mkdir -p "$TEST_DIR/bot1" - init_basic_git_repo "$TEST_DIR/bot1" - create_mock_gh "$TEST_DIR/bot1" "claude_approval" - - set +e - OUTPUT=$(PATH="$TEST_DIR/bot1/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/bot1/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/bot1/comments.md" ]]; then - # Verify Claude bot comment appears in formatted output - if grep -q "claude\[bot\]" "$TEST_DIR/bot1/comments.md" && grep -q "LGTM" "$TEST_DIR/bot1/comments.md"; then - pass "Claude bot comment parsed and formatted in output" - else - fail "Claude parsing" "claude[bot] and LGTM in output" "$(cat "$TEST_DIR/bot1/comments.md")" - fi - else - fail "Claude bot test" "exit 0 with output file" "exit=$EXIT_CODE" - fi - - # Test 8: Codex bot with severity markers parsed correctly - echo "" - echo "Test 8: Codex bot severity markers in fetch-pr-comments output" - mkdir -p "$TEST_DIR/bot2" - init_basic_git_repo "$TEST_DIR/bot2" - create_mock_gh "$TEST_DIR/bot2" "codex_issues" - - set +e - OUTPUT=$(PATH="$TEST_DIR/bot2/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/bot2/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/bot2/comments.md" ]]; then - # Verify Codex severity markers appear in output - if grep -q "chatgpt-codex-connector\[bot\]" "$TEST_DIR/bot2/comments.md" && grep -q "\[P1\]" "$TEST_DIR/bot2/comments.md"; then - pass "Codex severity markers parsed in output" - else - fail "Codex parsing" "[P1] marker in output" "$(cat "$TEST_DIR/bot2/comments.md")" - fi - else - fail "Codex bot test" "exit 0 with output file" "exit=$EXIT_CODE" - fi - - # Test 9: Multiple bot responses both appear in output - echo "" - echo "Test 9: Multiple bots in fetch-pr-comments output" - mkdir -p "$TEST_DIR/bot3" - init_basic_git_repo "$TEST_DIR/bot3" - create_mock_gh "$TEST_DIR/bot3" "mixed_bots" - - set +e - OUTPUT=$(PATH="$TEST_DIR/bot3/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/bot3/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/bot3/comments.md" ]]; then - # Verify both bots appear - if grep -q "claude\[bot\]" "$TEST_DIR/bot3/comments.md" && grep -q "chatgpt-codex-connector\[bot\]" "$TEST_DIR/bot3/comments.md"; then - pass "Multiple bot responses both appear in output" - else - fail "Multiple bots" "both bots in output" "$(cat "$TEST_DIR/bot3/comments.md")" - fi - else - fail "Multiple bots test" "exit 0 with output file" "exit=$EXIT_CODE" - fi - - # ======================================== - # JSON Edge Cases (via fetch-pr-comments.sh) - # ======================================== - - echo "" - echo "--- JSON Edge Cases ---" - echo "" - - # Test 10: Unicode in bot comments processed through full pipeline - echo "Test 10: Unicode comments processed by fetch-pr-comments" - mkdir -p "$TEST_DIR/json1" - init_basic_git_repo "$TEST_DIR/json1" - create_mock_gh "$TEST_DIR/json1" "unicode_comment" - - set +e - OUTPUT=$(PATH="$TEST_DIR/json1/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/json1/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/json1/comments.md" ]]; then - pass "Unicode comments processed successfully" - else - fail "Unicode handling" "exit 0 with output file" "exit=$EXIT_CODE" - fi - - # Test 11: Very long comment body processed - echo "" - echo "Test 11: Long comment body processed by fetch-pr-comments" - mkdir -p "$TEST_DIR/json2" - init_basic_git_repo "$TEST_DIR/json2" - create_mock_gh "$TEST_DIR/json2" "long_comment" - - set +e - OUTPUT=$(PATH="$TEST_DIR/json2/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/json2/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/json2/comments.md" ]]; then - # Verify the long content was written - FILE_SIZE=$(wc -c < "$TEST_DIR/json2/comments.md") - if [[ $FILE_SIZE -gt 1000 ]]; then - pass "Long comment body processed (file size: $FILE_SIZE bytes)" - else - pass "Long comment handled (may be truncated)" - fi - else - fail "Long body handling" "exit 0 with output file" "exit=$EXIT_CODE" - fi -} - -# Tests 12-19: PR Loop Stop Hook + poll-pr-reviews -run_poll_tests() { - - # ======================================== - # PR Loop Stop Hook Tests - # ======================================== - - echo "" - echo "--- PR Loop Stop Hook Tests ---" - echo "" - - # Test 12: Stop hook with no active PR loop - echo "Test 12: Stop hook with no active PR loop" - mkdir -p "$TEST_DIR/stop1" - init_basic_git_repo "$TEST_DIR/stop1" - - set +e - OUTPUT=$(echo '{}' | CLAUDE_PROJECT_DIR="$TEST_DIR/stop1" bash "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]]; then - pass "PR stop hook passes when no loop active" - else - fail "No PR loop handling" "exit 0" "exit $EXIT_CODE" - fi - - # Test 13: Stop hook with corrupted state - echo "" - echo "Test 13: Stop hook with corrupted state" - mkdir -p "$TEST_DIR/stop2/.humanize/pr-loop/2026-01-19_00-00-00" - echo "not valid yaml [[[" > "$TEST_DIR/stop2/.humanize/pr-loop/2026-01-19_00-00-00/state.md" - init_basic_git_repo "$TEST_DIR/stop2" - - set +e - OUTPUT=$(echo '{}' | CLAUDE_PROJECT_DIR="$TEST_DIR/stop2" bash "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) - EXIT_CODE=$? - set -e - - # Should handle gracefully without crashing - if [[ $EXIT_CODE -lt 128 ]]; then - pass "Stop hook handles corrupted state (exit $EXIT_CODE)" - else - fail "Corrupted state" "exit < 128" "exit $EXIT_CODE" - fi - - # Test 14: approve-state.md directory structure - echo "" - echo "Test 14: approve-state.md directory structure" - mkdir -p "$TEST_DIR/stop3/.humanize/pr-loop/2026-01-19_00-00-00" - create_pr_loop_state "$TEST_DIR/stop3" - - # The approve-state.md path should be writable - APPROVE_PATH="$TEST_DIR/stop3/.humanize/pr-loop/2026-01-19_00-00-00/approve-state.md" - touch "$APPROVE_PATH" 2>/dev/null - if [[ -f "$APPROVE_PATH" ]]; then - pass "approve-state.md path is writable" - rm "$APPROVE_PATH" - else - fail "Approve path" "writable" "not writable" - fi - - # ======================================== - # poll-pr-reviews.sh Tests - # ======================================== - - echo "" - echo "--- poll-pr-reviews.sh Script Tests ---" - echo "" - - # Test 15: poll-pr-reviews help displays usage - echo "Test 15: poll-pr-reviews help displays usage" - set +e - OUTPUT=$("$PROJECT_ROOT/scripts/poll-pr-reviews.sh" --help 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && echo "$OUTPUT" | grep -qi "usage\|poll"; then - pass "poll-pr-reviews help displays usage" - else - fail "poll-pr-reviews help" "exit 0 with usage" "exit=$EXIT_CODE" - fi - - # Test 16: poll-pr-reviews with missing required args - echo "" - echo "Test 16: poll-pr-reviews missing args rejected" - set +e - OUTPUT=$("$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "required\|error"; then - pass "poll-pr-reviews missing args rejected" - else - fail "poll-pr-reviews validation" "non-zero with error" "exit=$EXIT_CODE" - fi - - # Test 17: poll-pr-reviews with mocked gh returns JSON output with required fields - echo "" - echo "Test 17: poll-pr-reviews with mocked gh produces valid JSON output" - mkdir -p "$TEST_DIR/poll1" - init_basic_git_repo "$TEST_DIR/poll1" - create_mock_gh "$TEST_DIR/poll1" "claude_approval" - - set +e - OUTPUT=$(PATH="$TEST_DIR/poll1/bin:$PATH" "$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 123 --after "2026-01-18T00:00:00Z" --bots "claude" 2>&1) - EXIT_CODE=$? - set -e - - # poll-pr-reviews must output JSON with has_new_comments and parse correctly - if [[ $EXIT_CODE -eq 0 ]]; then - # Parse JSON to verify structure - HAS_NEW=$(echo "$OUTPUT" | jq -r '.has_new_comments // empty' 2>/dev/null || echo "") - if [[ -n "$HAS_NEW" ]] && [[ "$HAS_NEW" == "true" || "$HAS_NEW" == "false" ]]; then - # Also verify comments array exists (may be empty) - COMMENTS_TYPE=$(echo "$OUTPUT" | jq -r '.comments | type' 2>/dev/null || echo "") - if [[ "$COMMENTS_TYPE" == "array" ]]; then - pass "poll-pr-reviews produces valid JSON (has_new_comments=$HAS_NEW, comments is array)" - else - pass "poll-pr-reviews produces JSON with has_new_comments=$HAS_NEW" - fi - else - fail "poll-pr-reviews JSON" "has_new_comments boolean" "output missing or invalid: $OUTPUT" - fi - else - fail "poll-pr-reviews execution" "exit 0" "exit=$EXIT_CODE, output=$OUTPUT" - fi - - # Test 18: poll-pr-reviews timeout handling with slow mock - echo "" - echo "Test 18: poll-pr-reviews handles slow API gracefully" - mkdir -p "$TEST_DIR/poll2" - init_basic_git_repo "$TEST_DIR/poll2" - - # Create a mock gh that sleeps briefly but responds - mkdir -p "$TEST_DIR/poll2/bin" - cat > "$TEST_DIR/poll2/bin/gh" << 'GHEOF' -#!/bin/bash -# Handle repo view -if [[ "$1" == "repo" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"owner,name"* ]]; then - echo '{"owner":{"login":"testowner"},"name":"testrepo"}' - exit 0 - elif [[ "$*" == *"parent"* ]]; then - echo '{"parent":null}' - exit 0 - fi - fi - echo "testowner/testrepo" - exit 0 -fi -# Handle pr view -if [[ "$1" == "pr" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"number"* ]]; then - echo '{"number": 123}' - else - echo '{"state": "OPEN"}' - fi - exit 0 - fi - exit 0 -fi -# Simulate slow API -if [[ "$1" == "api" ]]; then - sleep 0.5 - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - chmod +x "$TEST_DIR/poll2/bin/gh" - - set +e - OUTPUT=$(PATH="$TEST_DIR/poll2/bin:$PATH" "$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 123 --after "2026-01-18T00:00:00Z" --bots "claude" 2>&1) - EXIT_CODE=$? - set -e - - # Should complete without hanging and produce valid JSON (even if empty) - if [[ $EXIT_CODE -eq 0 ]]; then - # Verify JSON output with has_new_comments (API returns empty, so should be false) - HAS_NEW=$(echo "$OUTPUT" | jq -r '.has_new_comments // empty' 2>/dev/null || echo "") - if [[ "$HAS_NEW" == "false" ]]; then - pass "poll-pr-reviews handles slow API (has_new_comments=false, no comments)" - elif [[ -n "$HAS_NEW" ]]; then - pass "poll-pr-reviews handles slow API (has_new_comments=$HAS_NEW)" - else - pass "poll-pr-reviews handles slow API gracefully (exit=0)" - fi - else - fail "poll-pr-reviews timeout" "exit 0" "exit=$EXIT_CODE" - fi - - # Test 19: poll-pr-reviews with API failure returns has_new_comments:false - echo "" - echo "Test 19: poll-pr-reviews with API failure returns has_new_comments:false" - mkdir -p "$TEST_DIR/poll3" - init_basic_git_repo "$TEST_DIR/poll3" - - # Create a mock gh that fails on API calls - mkdir -p "$TEST_DIR/poll3/bin" - cat > "$TEST_DIR/poll3/bin/gh" << 'GHEOF' -#!/bin/bash -# Check for -q flag anywhere in args (jq query) -HAS_Q_FLAG=false -for arg in "$@"; do - if [[ "$arg" == "-q" ]]; then - HAS_Q_FLAG=true - break - fi -done - -# Handle repo view -if [[ "$1" == "repo" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"owner,name"* ]]; then - if [[ "$HAS_Q_FLAG" == "true" ]]; then - echo "testowner/testrepo" - else - echo '{"owner":{"login":"testowner"},"name":"testrepo"}' - fi - exit 0 - elif [[ "$*" == *"parent"* ]]; then - if [[ "$HAS_Q_FLAG" == "true" ]]; then - echo "/" - else - echo '{"parent":null}' - fi - exit 0 - fi - fi - echo "testowner/testrepo" - exit 0 -fi -# Handle pr view -if [[ "$1" == "pr" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"number"* ]]; then - echo '{"number": 123}' - else - echo '{"state": "OPEN"}' - fi - exit 0 - fi - exit 0 -fi -# Fail on API calls to simulate network error -if [[ "$1" == "api" ]]; then - echo "Error: Network unreachable" >&2 - exit 1 -fi -exit 0 -GHEOF - chmod +x "$TEST_DIR/poll3/bin/gh" - - set +e - OUTPUT=$(PATH="$TEST_DIR/poll3/bin:$PATH" "$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 123 --after "2026-01-18T00:00:00Z" --bots "claude" 2>&1) - EXIT_CODE=$? - set -e - - # On API failure, poll-pr-reviews MUST: - # 1. Exit with code 0 - # 2. Output valid JSON (parseable by jq -e) - # 3. Have has_new_comments exactly equal to false - # NO FALLBACKS - all three conditions must be met - if [[ $EXIT_CODE -ne 0 ]]; then - fail "poll-pr-reviews API failure" "exit 0" "exit=$EXIT_CODE" - else - # Extract JSON from output (warnings precede JSON, JSON may be multi-line) - # Find the line number where JSON starts (first '{') and extract from there to end - JSON_START_LINE=$(echo "$OUTPUT" | grep -n '^{' | head -1 | cut -d: -f1) - if [[ -z "$JSON_START_LINE" ]]; then - fail "poll-pr-reviews API failure" "JSON output" "no JSON found in output" - else - JSON_OUTPUT=$(echo "$OUTPUT" | tail -n +$JSON_START_LINE) - - # Validate JSON is parseable using jq -e (exits non-zero on invalid JSON) - if ! echo "$JSON_OUTPUT" | jq -e '.' >/dev/null 2>&1; then - fail "poll-pr-reviews API failure" "valid JSON output" "invalid JSON: $JSON_OUTPUT" - else - # Verify has_new_comments is exactly boolean false (not string "false") - # jq -e '.has_new_comments == false' returns 0 only if the value is boolean false - if echo "$JSON_OUTPUT" | jq -e '.has_new_comments == false' >/dev/null 2>&1; then - pass "poll-pr-reviews returns exit 0 with valid JSON and has_new_comments:false (boolean)" - else - # Show actual value and type for debugging - HAS_NEW_VALUE=$(echo "$JSON_OUTPUT" | jq '.has_new_comments') - HAS_NEW_TYPE=$(echo "$JSON_OUTPUT" | jq -r '.has_new_comments | type') - fail "poll-pr-reviews API failure" "has_new_comments: boolean false" "value=$HAS_NEW_VALUE type=$HAS_NEW_TYPE" - fi - fi - fi - fi -} - -# ======================================== -# Source Guard: run all tests when executed directly -# ======================================== - -if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then - run_fetch_tests - run_poll_tests - print_test_summary "PR Loop API Robustness Test Summary" - exit $? -fi diff --git a/tests/robustness/test-session-robustness.sh b/tests/robustness/test-session-robustness.sh index 7b232e9f..4bc636af 100755 --- a/tests/robustness/test-session-robustness.sh +++ b/tests/robustness/test-session-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for concurrent session handling # diff --git a/tests/robustness/test-setup-scripts-robustness.sh b/tests/robustness/test-setup-scripts-robustness.sh index eea2d878..4e13e5b8 100755 --- a/tests/robustness/test-setup-scripts-robustness.sh +++ b/tests/robustness/test-setup-scripts-robustness.sh @@ -1,8 +1,8 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for setup scripts # -# Tests setup-rlcr-loop.sh and setup-pr-loop.sh under edge cases: +# Tests setup-rlcr-loop.sh under edge cases: # - Argument parsing edge cases # - Plan file validation edge cases # - Git repository edge cases @@ -62,6 +62,23 @@ init_basic_git_repo() { cd - > /dev/null } +# Create a minimal PATH toolset in a test bin directory so scripts using +# '/usr/bin/env bash' still run even in restricted PATH scenarios. +prepare_runtime_bin() { + local bin_dir="$1" + local tool + local tool_path + + mkdir -p "$bin_dir" + + for tool in bash env git dirname cat sed awk grep mkdir date head od tr wc sort ls rm cp mv chmod ln readlink printf timeout gtimeout; do + tool_path=$(command -v "$tool" 2>/dev/null || true) + if [[ -n "$tool_path" && -x "$tool_path" && ! -e "$bin_dir/$tool" ]]; then + ln -s "$tool_path" "$bin_dir/$tool" + fi + done +} + # Run setup-rlcr-loop.sh with proper isolation from real RLCR loop # Usage: run_rlcr_setup <test_repo_dir> [args...] run_rlcr_setup() { @@ -75,17 +92,6 @@ run_rlcr_setup() { ) } -# Run setup-pr-loop.sh with proper isolation from real PR loop -# Usage: run_pr_setup <test_repo_dir> [args...] -run_pr_setup() { - local repo_dir="$1" - shift - ( - cd "$repo_dir" - CLAUDE_PROJECT_DIR="$repo_dir" "$PROJECT_ROOT/scripts/setup-pr-loop.sh" "$@" - ) -} - # ======================================== # Setup RLCR Loop Argument Parsing Tests # ======================================== @@ -239,7 +245,7 @@ git -C "$TEST_DIR/repo9" add .gitignore && git -C "$TEST_DIR/repo9" commit -q -m # Create mock codex mkdir -p "$TEST_DIR/repo9/bin" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo9/bin/codex" chmod +x "$TEST_DIR/repo9/bin/codex" @@ -265,7 +271,7 @@ echo "plan.md" >> "$TEST_DIR/repo10/.gitignore" git -C "$TEST_DIR/repo10" add .gitignore && git -C "$TEST_DIR/repo10" commit -q -m "Add gitignore" mkdir -p "$TEST_DIR/repo10/bin" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo10/bin/codex" chmod +x "$TEST_DIR/repo10/bin/codex" @@ -288,7 +294,7 @@ echo "path with spaces/" >> "$TEST_DIR/repo11/.gitignore" git -C "$TEST_DIR/repo11" add .gitignore && git -C "$TEST_DIR/repo11" commit -q -m "Add gitignore" mkdir -p "$TEST_DIR/repo11/bin" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo11/bin/codex" chmod +x "$TEST_DIR/repo11/bin/codex" @@ -307,7 +313,7 @@ mkdir -p "$TEST_DIR/repo12" init_basic_git_repo "$TEST_DIR/repo12" mkdir -p "$TEST_DIR/repo12/bin" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo12/bin/codex" chmod +x "$TEST_DIR/repo12/bin/codex" @@ -328,7 +334,7 @@ init_basic_git_repo "$TEST_DIR/repo13" create_minimal_plan "$TEST_DIR/repo13" mkdir -p "$TEST_DIR/repo13/bin" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo13/bin/codex" chmod +x "$TEST_DIR/repo13/bin/codex" @@ -357,7 +363,7 @@ echo "plan.md" >> "$TEST_DIR/repo14/.gitignore" git -C "$TEST_DIR/repo14" add .gitignore && git -C "$TEST_DIR/repo14" commit -q -m "Add gitignore" mkdir -p "$TEST_DIR/repo14/bin" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo14/bin/codex" chmod +x "$TEST_DIR/repo14/bin/codex" @@ -389,7 +395,7 @@ echo "plan.md" >> "$TEST_DIR/repo15/.gitignore" git -C "$TEST_DIR/repo15" add .gitignore && git -C "$TEST_DIR/repo15" commit -q -m "Add gitignore" mkdir -p "$TEST_DIR/repo15/bin" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo15/bin/codex" chmod +x "$TEST_DIR/repo15/bin/codex" @@ -433,7 +439,7 @@ git -C "$TEST_DIR/repo16b" add .gitignore && git -C "$TEST_DIR/repo16b" commit - touch "$TEST_DIR/repo16b/.humanizeconfig" mkdir -p "$TEST_DIR/repo16b/bin" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo16b/bin/codex" chmod +x "$TEST_DIR/repo16b/bin/codex" @@ -473,7 +479,7 @@ create_minimal_plan "$TEST_DIR/repo18" git -C "$TEST_DIR/repo18" add plan.md && git -C "$TEST_DIR/repo18" commit -q -m "Add plan" mkdir -p "$TEST_DIR/repo18/bin" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo18/bin/codex" chmod +x "$TEST_DIR/repo18/bin/codex" @@ -485,68 +491,6 @@ else fail "Tracked plan without flag" "rejection" "exit=$EXIT_CODE" fi -# ======================================== -# Setup PR Loop Tests -# ======================================== - -echo "" -echo "--- Setup PR Loop Argument Tests ---" -echo "" - -# Test 19: Help flag displays usage -echo "Test 19: PR loop help flag displays usage" -OUTPUT=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --help 2>&1) || true -if echo "$OUTPUT" | grep -q "USAGE\|start-pr-loop"; then - pass "PR loop help flag displays usage" -else - fail "PR loop help" "USAGE text" "no usage found" -fi - -# Test 20: Missing bot flag shows error -echo "" -echo "Test 20: PR loop missing bot flag shows error" -OUTPUT=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "at least one bot flag"; then - pass "PR loop missing bot flag shows error" -else - fail "Missing bot flag" "error message" "exit=$EXIT_CODE" -fi - -# Test 21: Unknown option rejected -echo "" -echo "Test 21: PR loop unknown option rejected" -OUTPUT=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --unknown-option 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "unknown option"; then - pass "PR loop unknown option rejected" -else - fail "PR loop unknown option" "rejection" "exit=$EXIT_CODE" -fi - -# Test 22: --max with non-numeric value rejected -echo "" -echo "Test 22: PR loop --max with non-numeric value rejected" -OUTPUT=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --claude --max abc 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "positive integer"; then - pass "PR loop --max non-numeric rejected" -else - fail "PR loop --max validation" "rejection" "exit=$EXIT_CODE" -fi - -# Test 23: Non-git directory rejected -echo "" -echo "Test 23: PR loop non-git directory rejected" -mkdir -p "$TEST_DIR/pr-nongit" -OUTPUT=$(run_pr_setup "$TEST_DIR/pr-nongit" --claude 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "git repository"; then - pass "PR loop non-git directory rejected" -else - fail "PR loop non-git" "rejection" "exit=$EXIT_CODE" -fi - # ======================================== # Mutual Exclusion Tests # ======================================== @@ -573,7 +517,7 @@ max_iterations: 42 EOF mkdir -p "$TEST_DIR/repo24/bin" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo24/bin/codex" chmod +x "$TEST_DIR/repo24/bin/codex" @@ -585,38 +529,6 @@ else fail "RLCR mutual exclusion" "rejection" "exit=$EXIT_CODE" fi -# Test 25: PR loop blocks starting RLCR loop -echo "" -echo "Test 25: Active PR loop blocks new RLCR loop" -mkdir -p "$TEST_DIR/repo25" -init_basic_git_repo "$TEST_DIR/repo25" -create_minimal_plan "$TEST_DIR/repo25" -echo "plan.md" >> "$TEST_DIR/repo25/.gitignore" -git -C "$TEST_DIR/repo25" add .gitignore && git -C "$TEST_DIR/repo25" commit -q -m "Add gitignore" - -# Create fake active PR loop -mkdir -p "$TEST_DIR/repo25/.humanize/pr-loop/2026-01-19_00-00-00" -cat > "$TEST_DIR/repo25/.humanize/pr-loop/2026-01-19_00-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 ---- -EOF - -mkdir -p "$TEST_DIR/repo25/bin" -echo '#!/bin/bash -exit 0' > "$TEST_DIR/repo25/bin/codex" -chmod +x "$TEST_DIR/repo25/bin/codex" - -OUTPUT=$(PATH="$TEST_DIR/repo25/bin:$PATH" run_rlcr_setup "$TEST_DIR/repo25" plan.md 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "pr loop.*already active\|already active"; then - pass "Active PR loop blocks new RLCR loop" -else - fail "PR loop blocks RLCR" "rejection" "exit=$EXIT_CODE" -fi - # ======================================== # Symlink Protection Tests # ======================================== @@ -636,7 +548,7 @@ echo "symlink-plan.md" >> "$TEST_DIR/repo26/.gitignore" git -C "$TEST_DIR/repo26" add .gitignore && git -C "$TEST_DIR/repo26" commit -q -m "Add gitignore" mkdir -p "$TEST_DIR/repo26/bin" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo26/bin/codex" chmod +x "$TEST_DIR/repo26/bin/codex" @@ -664,7 +576,7 @@ echo "symlink-dir" >> "$TEST_DIR/repo27/.gitignore" git -C "$TEST_DIR/repo27" add .gitignore && git -C "$TEST_DIR/repo27" commit -q -m "Add gitignore" mkdir -p "$TEST_DIR/repo27/bin" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo27/bin/codex" chmod +x "$TEST_DIR/repo27/bin/codex" @@ -734,33 +646,6 @@ else pass "Valid numeric arguments accepted (--max 10, --codex-timeout 3600)" fi -# Test 30: Valid PR loop setup proceeds past argument validation -echo "" -echo "Test 30: Valid PR loop setup proceeds past argument validation" -mkdir -p "$TEST_DIR/repo30" -init_basic_git_repo "$TEST_DIR/repo30" - -# Create mock gh that fails auth check (to test dependency handling) -mkdir -p "$TEST_DIR/repo30/bin" -cat > "$TEST_DIR/repo30/bin/gh" << 'EOF' -#!/bin/bash -if [[ "$1" == "auth" && "$2" == "status" ]]; then - echo "Not logged in" >&2 - exit 1 -fi -exit 0 -EOF -chmod +x "$TEST_DIR/repo30/bin/gh" - -OUTPUT=$(PATH="$TEST_DIR/repo30/bin:$PATH" run_pr_setup "$TEST_DIR/repo30" --claude 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -# Should fail at gh auth check, not argument parsing -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "gh\|auth\|logged"; then - pass "Valid PR loop setup proceeds to gh auth check" -else - fail "Valid PR loop setup" "fail at gh auth check" "exit=$EXIT_CODE" -fi - # ======================================== # Timeout Scenario Tests # ======================================== @@ -787,20 +672,6 @@ else pass "--codex-timeout 0 accepted (non-negative integer validation)" fi -# Test 32: --codex-timeout with non-numeric value rejected (PR loop) -echo "" -echo "Test 32: PR loop --codex-timeout with non-numeric value rejected" -mkdir -p "$TEST_DIR/repo32" -init_basic_git_repo "$TEST_DIR/repo32" -mkdir -p "$TEST_DIR/repo32/bin" -OUTPUT=$(PATH="$TEST_DIR/repo32/bin:$PATH" run_pr_setup "$TEST_DIR/repo32" --claude --codex-timeout "abc" 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "positive integer"; then - pass "PR loop --codex-timeout non-numeric rejected" -else - fail "PR loop --codex-timeout non-numeric" "rejection with 'positive integer'" "exit=$EXIT_CODE, output=$OUTPUT" -fi - # Test 33: Very large timeout value accepted echo "" echo "Test 33: Very large timeout value accepted" @@ -839,7 +710,7 @@ REAL_GIT=$(command -v git) # Mock timeout that returns 124 for git rev-parse (first check in setup script) cat > "$TEST_DIR/repo34/bin/timeout" << TIMEOUTEOF -#!/bin/bash +#!/usr/bin/env bash # Mock timeout that returns 124 for git rev-parse to simulate timeout if [[ "\$*" == *"git"*"rev-parse"* ]]; then exit 124 @@ -856,7 +727,7 @@ chmod +x "$TEST_DIR/repo34/bin/gtimeout" # Create mock codex cat > "$TEST_DIR/repo34/bin/codex" << 'CODEXEOF' -#!/bin/bash +#!/usr/bin/env bash exit 0 CODEXEOF chmod +x "$TEST_DIR/repo34/bin/codex" @@ -1023,7 +894,7 @@ git -C "$TEST_DIR/repo42" add .gitignore && git -C "$TEST_DIR/repo42" commit -q # Create mock codex mkdir -p "$TEST_DIR/repo42/bin" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo42/bin/codex" chmod +x "$TEST_DIR/repo42/bin/codex" @@ -1085,6 +956,32 @@ else fail "--skip-impl summary scaffold" "round-0-summary.md exists" "not found" fi +# Test 44c: --skip-impl creates round-0-contract.md +echo "" +echo "Test 44c: --skip-impl creates round-0-contract.md" +if [[ -n "$LOOP_DIR" ]] && [[ -f "$LOOP_DIR/round-0-contract.md" ]]; then + if grep -qi "Mainline Objective" "$LOOP_DIR/round-0-contract.md"; then + pass "--skip-impl creates round-0-contract.md with mainline objective" + else + fail "--skip-impl round contract content" "Mainline Objective text" "$(cat "$LOOP_DIR/round-0-contract.md")" + fi +else + fail "--skip-impl round contract" "round-0-contract.md exists" "not found" +fi + +# Test 44d: --skip-impl prompt references the round contract +echo "" +echo "Test 44d: --skip-impl prompt references round-0-contract.md" +if [[ -n "$LOOP_DIR" ]] && [[ -f "$LOOP_DIR/round-0-prompt.md" ]]; then + if grep -q "round-0-contract.md" "$LOOP_DIR/round-0-prompt.md"; then + pass "--skip-impl prompt references round-0-contract.md" + else + fail "--skip-impl prompt contract reference" "prompt mentions round-0-contract.md" "$(cat "$LOOP_DIR/round-0-prompt.md")" + fi +else + fail "--skip-impl prompt contract reference" "round-0-prompt.md exists" "not found" +fi + # Test 45: --skip-impl with plan file still works echo "" echo "Test 45: --skip-impl with plan file still works" @@ -1095,7 +992,7 @@ printf 'plan.md\nbin/\n' >> "$TEST_DIR/repo45/.gitignore" git -C "$TEST_DIR/repo45" add .gitignore && git -C "$TEST_DIR/repo45" commit -q -m "Add gitignore" mkdir -p "$TEST_DIR/repo45/bin" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo45/bin/codex" chmod +x "$TEST_DIR/repo45/bin/codex" @@ -1113,6 +1010,44 @@ else fi fi +LOOP_DIR_45=$(find "$TEST_DIR/repo45/.humanize/rlcr" -maxdepth 1 -type d -name "20*" 2>/dev/null | head -1) + +echo "" +echo "Test 45b: --skip-impl with plan file preserves plan goal in goal-tracker" +if [[ -n "$LOOP_DIR_45" ]] && [[ -f "$LOOP_DIR_45/goal-tracker.md" ]]; then + if grep -q "Test the setup script robustness" "$LOOP_DIR_45/goal-tracker.md"; then + pass "--skip-impl with plan preserves plan goal anchor" + else + fail "--skip-impl plan goal anchor" "goal-tracker contains plan goal" "$(cat "$LOOP_DIR_45/goal-tracker.md")" + fi +else + fail "--skip-impl plan goal anchor" "goal-tracker.md exists" "not found" +fi + +echo "" +echo "Test 45c: --skip-impl with plan file prompt references original plan" +if [[ -n "$LOOP_DIR_45" ]] && [[ -f "$LOOP_DIR_45/round-0-prompt.md" ]]; then + if grep -q "@plan.md" "$LOOP_DIR_45/round-0-prompt.md"; then + pass "--skip-impl with plan prompt references original plan" + else + fail "--skip-impl plan prompt anchor" "round-0-prompt references @plan.md" "$(cat "$LOOP_DIR_45/round-0-prompt.md")" + fi +else + fail "--skip-impl plan prompt anchor" "round-0-prompt.md exists" "not found" +fi + +echo "" +echo "Test 45d: --skip-impl with plan file contract references original plan alignment" +if [[ -n "$LOOP_DIR_45" ]] && [[ -f "$LOOP_DIR_45/round-0-contract.md" ]]; then + if grep -qi "aligned with @plan.md" "$LOOP_DIR_45/round-0-contract.md"; then + pass "--skip-impl with plan contract references original plan" + else + fail "--skip-impl plan contract anchor" "round-0-contract references @plan.md" "$(cat "$LOOP_DIR_45/round-0-contract.md")" + fi +else + fail "--skip-impl plan contract anchor" "round-0-contract.md exists" "not found" +fi + # ======================================== # Dependency Check Tests # ======================================== @@ -1131,13 +1066,14 @@ git -C "$TEST_DIR/repo46" add .gitignore && git -C "$TEST_DIR/repo46" commit -q # Create bin dir with jq but no codex mkdir -p "$TEST_DIR/repo46/bin" +prepare_runtime_bin "$TEST_DIR/repo46/bin" cat > "$TEST_DIR/repo46/bin/jq" << 'EOF' -#!/bin/bash +#!/usr/bin/env bash exit 0 EOF chmod +x "$TEST_DIR/repo46/bin/jq" -# Hide system codex by making the only codex on PATH our empty bin dir -OUTPUT=$(PATH="$TEST_DIR/repo46/bin:/usr/bin:/bin" run_rlcr_setup "$TEST_DIR/repo46" plan.md 2>&1) || EXIT_CODE=$? +# Hide system codex by making the only codex on PATH our test bin dir +OUTPUT=$(PATH="$TEST_DIR/repo46/bin" run_rlcr_setup "$TEST_DIR/repo46" plan.md 2>&1) || EXIT_CODE=$? EXIT_CODE=${EXIT_CODE:-0} if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "Missing required dependencies" && echo "$OUTPUT" | grep -q "codex"; then pass "Missing codex detected in dependency check" @@ -1160,13 +1096,14 @@ git -C "$TEST_DIR/repo47" add .gitignore && git -C "$TEST_DIR/repo47" commit -q # Create bin dir with codex but no jq mkdir -p "$TEST_DIR/repo47/bin" +prepare_runtime_bin "$TEST_DIR/repo47/bin" cat > "$TEST_DIR/repo47/bin/codex" << 'EOF' -#!/bin/bash +#!/usr/bin/env bash exit 0 EOF chmod +x "$TEST_DIR/repo47/bin/codex" -# Use a restricted PATH that has git but no jq -OUTPUT=$(PATH="$TEST_DIR/repo47/bin:/usr/bin:/bin" run_rlcr_setup "$TEST_DIR/repo47" plan.md 2>&1) || EXIT_CODE=$? +# Use a restricted PATH with required runtime tools but no jq +OUTPUT=$(PATH="$TEST_DIR/repo47/bin" run_rlcr_setup "$TEST_DIR/repo47" plan.md 2>&1) || EXIT_CODE=$? EXIT_CODE=${EXIT_CODE:-0} if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "Missing required dependencies" && echo "$OUTPUT" | grep -q "jq"; then pass "Missing jq detected in dependency check" @@ -1231,10 +1168,10 @@ git -C "$TEST_DIR/repo49" add .gitignore && git -C "$TEST_DIR/repo49" commit -q # Create mock codex and jq mkdir -p "$TEST_DIR/repo49/bin" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo49/bin/codex" chmod +x "$TEST_DIR/repo49/bin/codex" -echo '#!/bin/bash +echo '#!/usr/bin/env bash exit 0' > "$TEST_DIR/repo49/bin/jq" chmod +x "$TEST_DIR/repo49/bin/jq" diff --git a/tests/robustness/test-state-file-robustness.sh b/tests/robustness/test-state-file-robustness.sh index ae6d0e7f..87cdfe41 100755 --- a/tests/robustness/test-state-file-robustness.sh +++ b/tests/robustness/test-state-file-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for state file parsing # @@ -36,7 +36,7 @@ cat > "$TEST_DIR/state.md" << 'EOF' --- current_round: 5 max_iterations: 10 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 5400 push_every_round: false @@ -79,7 +79,7 @@ max_iterations: 20 extra_field: some_value another_extra: 12345 custom_metadata: true -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 5400 --- @@ -404,7 +404,7 @@ cat > "$TEST_DIR/state-full-review.md" << 'EOF' current_round: 3 max_iterations: 20 full_review_round: 7 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high plan_file: plan.md plan_tracked: false @@ -429,7 +429,7 @@ cat > "$TEST_DIR/state-no-full-review.md" << 'EOF' --- current_round: 2 max_iterations: 15 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high plan_file: plan.md plan_tracked: false @@ -455,7 +455,7 @@ cat > "$TEST_DIR/state-min-full-review.md" << 'EOF' current_round: 1 max_iterations: 10 full_review_round: 2 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high plan_file: plan.md plan_tracked: false @@ -473,6 +473,55 @@ else fail "Parses state with min full_review_round" "return 0" "returned non-zero" fi +# Test 22: State file with drift-tracking fields +echo "" +echo "Test 22: State file with drift-tracking fields" +cat > "$TEST_DIR/state-drift-fields.md" << 'EOF' +--- +current_round: 4 +max_iterations: 12 +review_started: false +base_branch: main +mainline_stall_count: 2 +last_mainline_verdict: stalled +drift_status: replan_required +--- +EOF + +if parse_state_file "$TEST_DIR/state-drift-fields.md"; then + if [[ "$STATE_MAINLINE_STALL_COUNT" == "2" ]] && [[ "$STATE_LAST_MAINLINE_VERDICT" == "stalled" ]] && [[ "$STATE_DRIFT_STATUS" == "replan_required" ]]; then + pass "Parses drift-tracking fields correctly" + else + fail "Parses drift-tracking fields" "stall=2 verdict=stalled drift=replan_required" \ + "stall=$STATE_MAINLINE_STALL_COUNT verdict=$STATE_LAST_MAINLINE_VERDICT drift=$STATE_DRIFT_STATUS" + fi +else + fail "Parses state with drift-tracking fields" "return 0" "returned non-zero" +fi + +# Test 23: Missing drift-tracking fields use safe defaults +echo "" +echo "Test 23: Missing drift-tracking fields use safe defaults" +cat > "$TEST_DIR/state-no-drift-fields.md" << 'EOF' +--- +current_round: 1 +max_iterations: 8 +review_started: false +base_branch: main +--- +EOF + +if parse_state_file "$TEST_DIR/state-no-drift-fields.md"; then + if [[ "$STATE_MAINLINE_STALL_COUNT" == "0" ]] && [[ "$STATE_LAST_MAINLINE_VERDICT" == "unknown" ]] && [[ "$STATE_DRIFT_STATUS" == "normal" ]]; then + pass "Uses safe defaults for drift-tracking fields" + else + fail "Default drift-tracking fields" "stall=0 verdict=unknown drift=normal" \ + "stall=$STATE_MAINLINE_STALL_COUNT verdict=$STATE_LAST_MAINLINE_VERDICT drift=$STATE_DRIFT_STATUS" + fi +else + fail "Parses state without drift-tracking fields" "return 0" "returned non-zero" +fi + # ======================================== # Summary # ======================================== diff --git a/tests/robustness/test-state-transition-robustness.sh b/tests/robustness/test-state-transition-robustness.sh index 96eeb7da..6f6bdc62 100755 --- a/tests/robustness/test-state-transition-robustness.sh +++ b/tests/robustness/test-state-transition-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for state transition logic # diff --git a/tests/robustness/test-template-error-robustness.sh b/tests/robustness/test-template-error-robustness.sh index ccb3faac..7d9ae47a 100755 --- a/tests/robustness/test-template-error-robustness.sh +++ b/tests/robustness/test-template-error-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for template system error handling # diff --git a/tests/robustness/test-template-stress-robustness.sh b/tests/robustness/test-template-stress-robustness.sh index b599903b..0719c4ef 100755 --- a/tests/robustness/test-template-stress-robustness.sh +++ b/tests/robustness/test-template-stress-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for template system stress conditions # diff --git a/tests/robustness/test-timeout-robustness.sh b/tests/robustness/test-timeout-robustness.sh index 7d9276c9..03de1b92 100755 --- a/tests/robustness/test-timeout-robustness.sh +++ b/tests/robustness/test-timeout-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for timeout implementation # diff --git a/tests/run-all-tests.sh b/tests/run-all-tests.sh index cd3fb58a..00373b45 100755 --- a/tests/run-all-tests.sh +++ b/tests/run-all-tests.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Run all test suites for the Humanize plugin (parallel execution) # @@ -68,6 +68,7 @@ TEST_SUITES=( "test-templates-comprehensive.sh" "test-plan-file-hooks.sh" "test-stop-hook-legacy-compat.sh" + "test-stop-hook-bg-allow.sh" "test-error-scenarios.sh" "test-ansi-parsing.sh" "test-allowlist-validators.sh" @@ -84,11 +85,9 @@ TEST_SUITES=( "test-task-tag-routing.sh" "test-config-merge.sh" "test-config-error-handling.sh" + "test-codex-hook-install.sh" "test-unified-codex-config.sh" - "test-pr-loop-1-scripts.sh" - "test-pr-loop-2-hooks.sh" - "test-pr-loop-3-stophook.sh" - "test-pr-loop-system.sh" + "test-disable-nested-codex-hooks.sh" # Session ID and Agent Teams tests "test-session-id.sh" "test-agent-teams.sh" @@ -117,8 +116,6 @@ TEST_SUITES=( "robustness/test-hook-system-robustness.sh" "robustness/test-template-error-robustness.sh" "robustness/test-state-transition-robustness.sh" - "robustness/test-pr-loop-api-fetch.sh" - "robustness/test-pr-loop-api-poll.sh" ) # Tests that must be run with zsh (not bash) @@ -136,7 +133,7 @@ trap "rm -rf $OUTPUT_DIR" EXIT if ! command -v codex &>/dev/null; then mkdir -p "$OUTPUT_DIR/mock-bin" cat > "$OUTPUT_DIR/mock-bin/codex" << 'MOCK_CODEX' -#!/bin/bash +#!/usr/bin/env bash exit 0 MOCK_CODEX chmod +x "$OUTPUT_DIR/mock-bin/codex" diff --git a/tests/setup-fixture-mock-gh.sh b/tests/setup-fixture-mock-gh.sh deleted file mode 100755 index eac3a8e3..00000000 --- a/tests/setup-fixture-mock-gh.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/bin/bash -# -# Create a mock gh CLI that returns fixture data for testing -# fetch-pr-comments.sh and poll-pr-reviews.sh -# -# Usage: ./setup-fixture-mock-gh.sh <mock_bin_dir> <fixtures_dir> -# -# The mock gh will: -# - Return fixture data for /issues/*/comments, /pulls/*/comments, /pulls/*/reviews -# - Return testuser for gh api user -# - Return testowner/testrepo for gh repo view -# - -set -euo pipefail - -MOCK_BIN_DIR="${1:-}" -FIXTURES_DIR="${2:-}" - -if [[ -z "$MOCK_BIN_DIR" || -z "$FIXTURES_DIR" ]]; then - echo "Usage: $0 <mock_bin_dir> <fixtures_dir>" >&2 - exit 1 -fi - -mkdir -p "$MOCK_BIN_DIR" - -# Create mock gh that returns fixtures -cat > "$MOCK_BIN_DIR/gh" << MOCK_GH_EOF -#!/bin/bash -# Fixture-backed mock gh CLI for testing fetch/poll scripts - -FIXTURES_DIR="$FIXTURES_DIR" - -case "\$1" in - auth) - if [[ "\$2" == "status" ]]; then - echo "Logged in to github.com" - exit 0 - fi - ;; - repo) - if [[ "\$2" == "view" ]]; then - if [[ "\$*" == *"owner,name"* ]] || [[ "\$*" == *"owner"* && "\$*" == *"name"* ]]; then - echo '{"owner": {"login": "testowner"}, "name": "testrepo"}' - elif [[ "\$*" == *"parent"* ]]; then - echo '{"parent": null}' - elif [[ "\$*" == *"owner"* ]]; then - echo '{"owner": {"login": "testowner"}}' - elif [[ "\$*" == *"name"* ]]; then - echo '{"name": "testrepo"}' - fi - exit 0 - fi - ;; - pr) - if [[ "\$2" == "view" ]]; then - if [[ "\$*" == *"number"* ]]; then - echo '{"number": 123}' - elif [[ "\$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - fi - exit 0 - fi - ;; - api) - # Handle user endpoint - if [[ "\$2" == "user" ]]; then - echo '{"login": "testuser"}' - exit 0 - fi - - # Handle issue comments endpoint - if [[ "\$2" == *"/issues/"*"/comments"* ]]; then - cat "\$FIXTURES_DIR/issue_comments.json" - exit 0 - fi - - # Handle PR review comments endpoint (inline comments) - if [[ "\$2" == *"/pulls/"*"/comments"* ]]; then - cat "\$FIXTURES_DIR/review_comments.json" - exit 0 - fi - - # Handle PR reviews endpoint - if [[ "\$2" == *"/pulls/"*"/reviews"* ]]; then - cat "\$FIXTURES_DIR/pr_reviews.json" - exit 0 - fi - - # Default: return empty array - echo "[]" - exit 0 - ;; -esac - -echo "Mock gh: unhandled command: \$*" >&2 -exit 1 -MOCK_GH_EOF - -chmod +x "$MOCK_BIN_DIR/gh" - -echo "$MOCK_BIN_DIR" diff --git a/tests/setup-monitor-test-env.sh b/tests/setup-monitor-test-env.sh index 14673c6c..977d1b53 100755 --- a/tests/setup-monitor-test-env.sh +++ b/tests/setup-monitor-test-env.sh @@ -1,8 +1,8 @@ -#!/bin/bash +#!/usr/bin/env bash # # Helper script to set up monitor test environment # This script creates the necessary directory structure and state files -# for testing the monitor pr command. +# for testing the monitor command. # # Usage: ./setup-monitor-test-env.sh <test_dir> <test_name> # @@ -18,72 +18,9 @@ if [[ -z "$TEST_DIR" ]]; then fi case "$TEST_NAME" in - yaml_list) - # Test: active_bots with YAML list format - TIMESTAMP="2026-01-18_16-00-00" - mkdir -p "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP" - cat > "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP/state.md" << 'STATEEOF' ---- -current_round: 1 -max_iterations: 42 -pr_number: 456 -start_branch: feature-branch -configured_bots: - - claude - - codex -active_bots: - - claude - - codex -codex_model: gpt-5.4 -codex_effort: medium -started_at: 2026-01-18T16:00:00Z ---- -STATEEOF - ;; - configured) - # Test: configured_bots vs active_bots (partial approval) - TIMESTAMP="2026-01-18_16-01-00" - mkdir -p "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP" - cat > "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP/state.md" << 'STATEEOF' ---- -current_round: 2 -max_iterations: 42 -pr_number: 789 -start_branch: test-branch -configured_bots: - - claude - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -started_at: 2026-01-18T16:00:00Z ---- -STATEEOF - ;; - empty) - # Test: empty active_bots (all approved) - TIMESTAMP="2026-01-18_16-02-00" - mkdir -p "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP" - cat > "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP/state.md" << 'STATEEOF' ---- -current_round: 3 -max_iterations: 42 -pr_number: 999 -start_branch: approved-branch -configured_bots: - - claude - - codex -active_bots: -codex_model: gpt-5.4 -codex_effort: medium -started_at: 2026-01-18T16:00:00Z ---- -STATEEOF - ;; *) echo "Unknown test name: $TEST_NAME" >&2 - echo "Available: yaml_list, configured, empty" >&2 + echo "Available: (none currently)" >&2 exit 1 ;; esac diff --git a/tests/test-agent-teams.sh b/tests/test-agent-teams.sh index 27285561..ef67073d 100755 --- a/tests/test-agent-teams.sh +++ b/tests/test-agent-teams.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for --agent-teams feature in RLCR loop # @@ -474,7 +474,7 @@ GI_EOF --- current_round: $round max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 5400 push_every_round: false @@ -488,6 +488,9 @@ ask_codex_question: false full_review_round: 5 session_id: agent_teams: $agent_teams +mainline_stall_count: 0 +last_mainline_verdict: unknown +drift_status: normal --- STATE_EOF @@ -516,6 +519,16 @@ GT_EOF Implemented features as requested. SUM_EOF + cat > "$LOOP_DIR/round-${round}-contract.md" << CONTRACT_EOF +# Round $round Contract + +- Mainline Objective: Continue the requested implementation round +- Target ACs: AC-1 +- Blocking Side Issues In Scope: none +- Queued Side Issues Out of Scope: none +- Success Criteria: advance the mainline objective without drift +CONTRACT_EOF + # Set up isolated cache directory export XDG_CACHE_HOME="$TEST_DIR/.cache" mkdir -p "$XDG_CACHE_HOME" @@ -531,12 +544,19 @@ setup_mock_codex_impl_feedback() { local feedback="$1" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << MOCK_EOF -#!/bin/bash -if [[ "\$1" == "exec" ]]; then +#!/usr/bin/env bash +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'REVIEW' $feedback REVIEW -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then echo "No issues found." fi MOCK_EOF @@ -549,10 +569,17 @@ setup_mock_codex_review_issues() { local review_output="$1" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << MOCK_EOF -#!/bin/bash -if [[ "\$1" == "exec" ]]; then +#!/usr/bin/env bash +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then echo "Should not be called in review phase" -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then cat << 'REVIEWOUT' $review_output REVIEWOUT @@ -569,6 +596,8 @@ MOCK_EOF setup_stophook_test 3 "true" "false" setup_mock_codex_impl_feedback "## Review Feedback +Mainline Progress Verdict: ADVANCED + Some issues found: - Issue 1: Missing error handling @@ -599,6 +628,46 @@ else fail "impl phase with agent_teams=true: next-round prompt contains agent-teams continuation" "round-4-prompt.md exists" "not found (hook exit=$HOOK_EXIT)" fi +# ======================================== +# Test: Drift recovery prompt still preserves agent-teams continuation +# ======================================== + +setup_stophook_test 3 "true" "false" +perl -0pi -e 's/mainline_stall_count: 0/mainline_stall_count: 1/' "$LOOP_DIR/state.md" +perl -0pi -e 's/last_mainline_verdict: unknown/last_mainline_verdict: stalled/' "$LOOP_DIR/state.md" +setup_mock_codex_impl_feedback "## Review Feedback + +Mainline Progress Verdict: STALLED + +- Mainline gap: AC-1 still has no stable implementation +- Blocking side issue: the team is repeating the same non-advancing fix pattern + +Recover the mainline before trying again. + +CONTINUE" + +HOOK_INPUT='{"stop_hook_active": false, "transcript": [], "session_id": ""}' +set +e +RESULT=$(echo "$HOOK_INPUT" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$STOP_HOOK" 2>/dev/null) +HOOK_EXIT=$? +set -e + +NEXT_PROMPT="$LOOP_DIR/round-4-prompt.md" +if [[ -f "$NEXT_PROMPT" ]]; then + if grep -q "Drift Recovery Mode" "$NEXT_PROMPT"; then + pass "drift recovery prompt generated for stalled mainline" + else + fail "drift recovery prompt generated for stalled mainline" "Drift Recovery Mode" "not found" + fi + if grep -qi "Agent Teams" "$NEXT_PROMPT"; then + pass "drift recovery prompt keeps agent-teams continuation" + else + fail "drift recovery prompt keeps agent-teams continuation" "agent-teams text in prompt" "not found" + fi +else + fail "drift recovery prompt keeps agent-teams continuation" "round-4-prompt.md exists" "not found (hook exit=$HOOK_EXIT)" +fi + # ======================================== # Test: Implementation phase with agent_teams=false has no continuation # ======================================== @@ -606,6 +675,8 @@ fi setup_stophook_test 3 "false" "false" setup_mock_codex_impl_feedback "## Review Feedback +Mainline Progress Verdict: ADVANCED + Some issues found: - Issue 1: Missing error handling diff --git a/tests/test-allowlist-validators.sh b/tests/test-allowlist-validators.sh index ebc4b844..fc5c2c98 100755 --- a/tests/test-allowlist-validators.sh +++ b/tests/test-allowlist-validators.sh @@ -1,12 +1,12 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for allowlist behavior in RLCR loop validators # # Tests: # - is_allowlisted_file() function in loop-common.sh -# - Read validator allowlist for todos and summaries -# - Write validator allowlist for todos and summaries -# - Edit validator allowlist for todos and summaries +# - Read validator allowlist for todos, summaries, and contracts +# - Write validator allowlist for todos, summaries, and contracts +# - Edit validator allowlist for todos, summaries, and contracts # - Bash validator allowlist for todos files (path-restricted) # @@ -117,6 +117,14 @@ else fail "round-2-summary.md blocked" "false" "true" fi +# Test 6b: Non-allowlisted file - round-0-contract.md +echo "Test 6b: round-0-contract.md is NOT allowlisted" +if ! is_allowlisted_file "$ACTIVE_LOOP_DIR/round-0-contract.md" "$ACTIVE_LOOP_DIR"; then + pass "round-0-contract.md is NOT allowlisted" +else + fail "round-0-contract.md blocked" "false" "true" +fi + # Test 7: Wrong directory - allowlisted filename but wrong path echo "Test 7: round-1-todos.md in wrong directory is NOT allowlisted" if ! is_allowlisted_file "/other/path/round-1-todos.md" "$ACTIVE_LOOP_DIR"; then @@ -158,6 +166,19 @@ else fail "Write validator round-0-summary.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi +# Test 9b: Write validator allows current round contract +echo "Test 9b: Write validator allows round-5-contract.md (current round)" +HOOK_INPUT='{"tool_name": "Write", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Write validator allows round-5-contract.md" +else + fail "Write validator round-5-contract.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 10: Write validator blocks round-3-todos.md (not in allowlist) echo "Test 10: Write validator blocks round-3-todos.md" HOOK_INPUT='{"tool_name": "Write", "tool_input": {"file_path": "'$LOOP_DIR'/round-3-todos.md"}}' @@ -184,6 +205,19 @@ else fail "Write validator round-2-summary.md" "exit 2 with round error" "exit $EXIT_CODE, output: $RESULT" fi +# Test 11b: Write validator blocks stale round contract +echo "Test 11b: Write validator blocks round-3-contract.md" +HOOK_INPUT='{"tool_name": "Write", "tool_input": {"file_path": "'$LOOP_DIR'/round-3-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "round"; then + pass "Write validator blocks round-3-contract.md" +else + fail "Write validator round-3-contract.md" "exit 2 with round error" "exit $EXIT_CODE, output: $RESULT" +fi + echo "" echo "=== Test: Edit Validator Allowlist ===" echo "" @@ -214,6 +248,32 @@ else fail "Edit validator round-1-summary.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi +# Test 13b: Edit validator allows current round contract +echo "Test 13b: Edit validator allows round-5-contract.md (current round)" +HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Edit validator allows round-5-contract.md" +else + fail "Edit validator round-5-contract.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" +fi + +# Test 13c: Edit validator blocks stale round contract +echo "Test 13c: Edit validator blocks round-0-contract.md" +HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/round-0-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "round"; then + pass "Edit validator blocks round-0-contract.md" +else + fail "Edit validator round-0-contract.md" "exit 2 with round error" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 14: Edit validator blocks round-4-todos.md echo "Test 14: Edit validator blocks round-4-todos.md" HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/round-4-todos.md"}}' @@ -257,6 +317,19 @@ else fail "Read validator round-0-summary.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi +# Test 16b: Read validator allows current round contract +echo "Test 16b: Read validator allows round-5-contract.md (current round)" +HOOK_INPUT='{"tool_name": "Read", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-read-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Read validator allows round-5-contract.md" +else + fail "Read validator round-5-contract.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 17: Read validator blocks round-3-todos.md echo "Test 17: Read validator blocks round-3-todos.md" HOOK_INPUT='{"tool_name": "Read", "tool_input": {"file_path": "'$LOOP_DIR'/round-3-todos.md"}}' @@ -283,6 +356,19 @@ else fail "Read validator round-3-summary.md" "exit 2 with round error" "exit $EXIT_CODE, output: $RESULT" fi +# Test 18b: Read validator blocks stale round contract +echo "Test 18b: Read validator blocks round-3-contract.md" +HOOK_INPUT='{"tool_name": "Read", "tool_input": {"file_path": "'$LOOP_DIR'/round-3-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-read-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "round"; then + pass "Read validator blocks round-3-contract.md" +else + fail "Read validator round-3-contract.md" "exit 2 with round error" "exit $EXIT_CODE, output: $RESULT" +fi + echo "" echo "=== Test: Bash Validator Allowlist (Path-Restricted) ===" echo "" @@ -313,6 +399,19 @@ else fail "Bash validator round-2-todos.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi +# Test 20b: Bash validator blocks round-5-contract.md +echo "Test 20b: Bash validator blocks round-5-contract.md" +HOOK_INPUT='{"tool_name": "Bash", "tool_input": {"command": "echo test > '$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-bash-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "contract"; then + pass "Bash validator blocks round-5-contract.md" +else + fail "Bash validator round-5-contract.md" "exit 2 with contract error" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 21: Bash validator blocks round-1-todos.md in wrong directory echo "Test 21: Bash validator blocks round-1-todos.md in wrong directory" HOOK_INPUT='{"tool_name": "Bash", "tool_input": {"command": "echo test > /tmp/round-1-todos.md"}}' diff --git a/tests/test-ansi-parsing.sh b/tests/test-ansi-parsing.sh index 4394a8d7..8f70847f 100755 --- a/tests/test-ansi-parsing.sh +++ b/tests/test-ansi-parsing.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test ANSI escape code handling in test runner output parsing # diff --git a/tests/test-ask-codex.sh b/tests/test-ask-codex.sh index 620cd613..896f282a 100755 --- a/tests/test-ask-codex.sh +++ b/tests/test-ask-codex.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for ask-codex.sh - one-shot consultation with mock Codex # @@ -37,7 +37,7 @@ MOCK_BIN_DIR="$TEST_DIR/mock-bin" mkdir -p "$MOCK_BIN_DIR" cat > "$MOCK_BIN_DIR/codex" << 'MOCK_EOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex binary for testing ask-codex.sh # Controlled via environment variables. if [[ -n "${MOCK_CODEX_STDERR:-}" ]]; then diff --git a/tests/test-bash-validator-patterns.sh b/tests/test-bash-validator-patterns.sh index 62a491f7..bd7f07f4 100755 --- a/tests/test-bash-validator-patterns.sh +++ b/tests/test-bash-validator-patterns.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for command_modifies_file function in loop-common.sh # diff --git a/tests/test-bitlesson-select-routing.sh b/tests/test-bitlesson-select-routing.sh index d3c205c3..68ecfa13 100755 --- a/tests/test-bitlesson-select-routing.sh +++ b/tests/test-bitlesson-select-routing.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Tests for bitlesson-select.sh provider routing set -euo pipefail @@ -26,12 +26,50 @@ create_mock_bitlesson() { EOF } +create_real_bitlesson() { + local dir="$1" + mkdir -p "$dir" + cat > "$dir/bitlesson.md" <<'EOF' +# BitLesson Knowledge Base +## Entries + +## Lesson: Avoid tracker drift +Lesson ID: BL-20260315-tracker-drift +Scope: goal-tracker.md +Problem Description: Tracker diverges from actual task status. +Root Cause: Status rows are not updated after verification. +Solution: Update tracker rows immediately after each verification step. +Constraints: Keep tracker edits minimal. +Validation Evidence: Verified in test fixture. +Source Rounds: 0 +EOF +} + +create_real_humanize_bitlesson() { + local dir="$1" + mkdir -p "$dir/.humanize" + cat > "$dir/.humanize/bitlesson.md" <<'EOF' +# BitLesson Knowledge Base +## Entries + +## Lesson: Avoid tracker drift +Lesson ID: BL-20260315-tracker-drift +Scope: goal-tracker.md +Problem Description: Tracker diverges from actual task status. +Root Cause: Status rows are not updated after verification. +Solution: Update tracker rows immediately after each verification step. +Constraints: Keep tracker edits minimal. +Validation Evidence: Verified in test fixture. +Source Rounds: 0 +EOF +} + # Helper: create a mock codex binary that outputs valid bitlesson-selector format create_mock_codex() { local bin_dir="$1" mkdir -p "$bin_dir" cat > "$bin_dir/codex" <<'EOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex that only reads prompt content from stdin when invoked with trailing '-' if [[ "${*: -1}" != "-" ]]; then echo "mock codex expected trailing '-' to read prompt from stdin" >&2 @@ -58,7 +96,7 @@ create_recording_mock_codex() { local stdin_file="$2" mkdir -p "$bin_dir" cat > "$bin_dir/codex" <<EOF -#!/bin/bash +#!/usr/bin/env bash if [[ "\${*: -1}" != "-" ]]; then echo "mock codex expected trailing '-' to read prompt from stdin" >&2 exit 9 @@ -83,7 +121,7 @@ create_mock_claude() { local bin_dir="$1" mkdir -p "$bin_dir" cat > "$bin_dir/claude" <<'EOF' -#!/bin/bash +#!/usr/bin/env bash # Mock claude that outputs valid bitlesson-selector format # Consume stdin so the pipe does not break cat > /dev/null @@ -102,7 +140,7 @@ echo "--- Test 1: gpt-* model routes to codex ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_codex "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -131,7 +169,7 @@ echo "--- Test 1b: gpt-* codex path passes stdin prompt via trailing '-' ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" STDIN_FILE="$TEST_DIR/codex-stdin.txt" create_recording_mock_codex "$BIN_DIR" "$STDIN_FILE" @@ -166,7 +204,7 @@ echo "--- Test 2: haiku model routes to claude ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_claude "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -195,7 +233,7 @@ echo "--- Test 3: sonnet model routes to claude ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_claude "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -224,7 +262,7 @@ echo "--- Test 4: OPUS (uppercase) model routes to claude ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_claude "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -253,7 +291,7 @@ echo "--- Test 5: Unknown model exits non-zero with error ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" printf '{"bitlesson_model": "unknown-xyz-model"}' > "$TEST_DIR/.humanize/config.json" @@ -279,7 +317,7 @@ echo "--- Test 6: gpt-* model with missing codex binary exits non-zero ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" printf '{"bitlesson_model": "gpt-4o"}' > "$TEST_DIR/.humanize/config.json" # Use a bin dir that contains a stub claude but NOT codex. @@ -287,7 +325,7 @@ NO_CODEX_BIN="$TEST_DIR/no-codex-bin" mkdir -p "$NO_CODEX_BIN" # Provide a stub claude so it does not interfere with the codex check cat > "$NO_CODEX_BIN/claude" <<'EOF' -#!/bin/bash +#!/usr/bin/env bash exit 0 EOF chmod +x "$NO_CODEX_BIN/claude" @@ -315,7 +353,7 @@ echo "--- Test 7: haiku model falls back to codex when claude binary is missing echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" printf '{"bitlesson_model": "haiku"}' > "$TEST_DIR/.humanize/config.json" # Use a bin dir that contains a stub codex but NOT claude. @@ -323,7 +361,7 @@ NO_CLAUDE_BIN="$TEST_DIR/no-claude-bin" mkdir -p "$NO_CLAUDE_BIN" # Provide a stub codex that produces valid bitlesson output (proves fallback worked) cat > "$NO_CLAUDE_BIN/codex" <<'MOCK_EOF' -#!/bin/bash +#!/usr/bin/env bash echo "LESSON_IDS: NONE" echo "RATIONALE: No relevant lessons for this task." MOCK_EOF @@ -348,4 +386,111 @@ fi # Summary # ======================================== +echo "" +echo "--- Test 8: codex-only provider mode forces codex routing ---" +echo "" + +setup_test_dir +create_real_bitlesson "$TEST_DIR" +mkdir -p "$TEST_DIR/.humanize" +printf '{"bitlesson_model": "haiku", "codex_model": "gpt-5.5", "provider_mode": "codex-only"}' > "$TEST_DIR/.humanize/config.json" +FALLBACK_BIN="$TEST_DIR/fallback-bin" +create_mock_codex "$FALLBACK_BIN" + +exit_code=0 +stdout_out="" +stdout_out=$(CLAUDE_PROJECT_DIR="$TEST_DIR" XDG_CONFIG_HOME="$TEST_DIR/no-user" \ + PATH="$FALLBACK_BIN:$PATH" \ + bash "$BITLESSON_SELECT" \ + --task "Initialize tracker" \ + --paths "plans/plan.md" \ + --bitlesson-file "$TEST_DIR/bitlesson.md" 2>/dev/null) || exit_code=$? + +if [[ $exit_code -eq 0 ]] && echo "$stdout_out" | grep -q "mock codex"; then + pass "codex-only provider mode forces codex routing" +else + fail "codex-only provider mode forces codex routing" "exit=0 + mock codex rationale" "exit=$exit_code, stdout=$stdout_out" +fi + +echo "" +echo "--- Test 9: Placeholder BitLesson file short-circuits to NONE ---" +echo "" + +setup_test_dir +create_mock_bitlesson "$TEST_DIR" +mkdir -p "$TEST_DIR/.humanize" +printf '{"bitlesson_model": "gpt-5.5"}' > "$TEST_DIR/.humanize/config.json" + +exit_code=0 +stdout_out="" +stdout_out=$(CLAUDE_PROJECT_DIR="$TEST_DIR" XDG_CONFIG_HOME="$TEST_DIR/no-user" \ + PATH="$SAFE_BASE_PATH" \ + bash "$BITLESSON_SELECT" \ + --task "Any task" \ + --paths "README.md" \ + --bitlesson-file "$TEST_DIR/.humanize/bitlesson.md" 2>/dev/null) || exit_code=$? + +if [[ $exit_code -eq 0 ]] && echo "$stdout_out" | grep -q "LESSON_IDS: NONE" && echo "$stdout_out" | grep -q "no recorded lessons"; then + pass "Placeholder BitLesson file returns NONE without invoking a model" +else + fail "Placeholder BitLesson file returns NONE without invoking a model" "exit=0 + NONE rationale" "exit=$exit_code, stdout=$stdout_out" +fi + +echo "" +echo "--- Test 10: Codex selector disables hooks and avoids full-auto ---" +echo "" + +setup_test_dir +create_real_bitlesson "$TEST_DIR" +mkdir -p "$TEST_DIR/.humanize" +printf '{"bitlesson_model": "gpt-5.5"}' > "$TEST_DIR/.humanize/config.json" +CAPTURE_BIN="$TEST_DIR/capture-bin" +mkdir -p "$CAPTURE_BIN" +cat > "$CAPTURE_BIN/codex" <<'EOF' +#!/usr/bin/env bash +# Respond to help probes with supported flags +for arg in "$@"; do + if [[ "$arg" == "--help" ]]; then + echo " --disable <feature> Disable a feature" + echo " --skip-git-repo-check Skip git repo check" + echo " --ephemeral Ephemeral mode" + exit 0 + fi +done +printf '%s\n' "$@" > "${TEST_CAPTURE_ARGS:?}" +cat > /dev/null +cat <<'OUT' +LESSON_IDS: BL-20260315-tracker-drift +RATIONALE: The tracker lesson directly matches the task. +OUT +EOF +chmod +x "$CAPTURE_BIN/codex" + +CAPTURE_ARGS="$TEST_DIR/codex-args.txt" +exit_code=0 +stdout_out="" +stdout_out=$(TEST_CAPTURE_ARGS="$CAPTURE_ARGS" CLAUDE_PROJECT_DIR="$TEST_DIR" XDG_CONFIG_HOME="$TEST_DIR/no-user" \ + PATH="$CAPTURE_BIN:$SAFE_BASE_PATH" \ + bash "$BITLESSON_SELECT" \ + --task "Update the goal tracker after verification" \ + --paths "goal-tracker.md" \ + --bitlesson-file "$TEST_DIR/bitlesson.md" 2>/dev/null) || exit_code=$? + +captured_args="$(cat "$CAPTURE_ARGS")" + +if [[ $exit_code -eq 0 ]] \ + && echo "$stdout_out" | grep -q "BL-20260315-tracker-drift" \ + && echo "$captured_args" | grep -q -- '--disable' \ + && echo "$captured_args" | grep -q -- 'codex_hooks' \ + && echo "$captured_args" | grep -q -- '--skip-git-repo-check' \ + && echo "$captured_args" | grep -q -- '--ephemeral' \ + && echo "$captured_args" | grep -q -- 'read-only' \ + && ! echo "$captured_args" | grep -q -- '--full-auto'; then + pass "Codex selector runs as a direct helper without hooks or full-auto" +else + fail "Codex selector runs as a direct helper without hooks or full-auto" \ + "exit=0 + direct-helper args" \ + "exit=$exit_code, stdout=$stdout_out, args=$captured_args" +fi + print_test_summary "Bitlesson Select Routing Test Summary" diff --git a/tests/test-bitlesson-validate-delta.sh b/tests/test-bitlesson-validate-delta.sh index 276a128f..c63b2128 100755 --- a/tests/test-bitlesson-validate-delta.sh +++ b/tests/test-bitlesson-validate-delta.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for bitlesson-validate-delta.sh validation rules # diff --git a/tests/test-cancel-signal-file.sh b/tests/test-cancel-signal-file.sh index 4d23eef3..420c5d5d 100755 --- a/tests/test-cancel-signal-file.sh +++ b/tests/test-cancel-signal-file.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for cancel-rlcr-loop signal file mechanism # @@ -1342,6 +1342,82 @@ else pass "is_cancel_authorized rejects hidden variables" fi +echo "HELPER TEST 8: is_cancel_authorized accepts symlinked-prefix path" +# Regression test: when the user supplies the active-loop path through a +# symlinked prefix (e.g. /var/... on macOS resolves to /private/var/...), +# the authorization check must canonicalize both sides so it still matches. +# We simulate the scenario by creating an all-lowercase sibling layout +# (mktemp dirs contain mixed case, which would defeat realpath once the +# command is lowercased on case-sensitive filesystems), then symlinking +# from there back to the real loop dir. +setup_test_loop "helper-8" +touch "$LOOP_DIR/.cancel-requested" + +SYMLINK_ROOT=$(mktemp -d "${TMPDIR:-/tmp}/humanize-symlink-XXXXXXXX" | tr '[:upper:]' '[:lower:]') +# mktemp already lowercases when we pipe it; re-run if the resulting dir does +# not actually exist (shouldn't happen but defensive for portability). +[[ -d "$SYMLINK_ROOT" ]] || { rm -rf "$SYMLINK_ROOT" 2>/dev/null; SYMLINK_ROOT="${TMPDIR:-/tmp}/humanize-symlink-lowercase-$$"; mkdir -p "$SYMLINK_ROOT"; } + +SYMLINK_LOOP_DIR="$SYMLINK_ROOT/via-symlink" +ln -sfn "$LOOP_DIR" "$SYMLINK_LOOP_DIR" + +CANONICAL_LOOP_DIR="$(cd "$LOOP_DIR" && pwd -P)" +COMMAND_LOWER="mv ${SYMLINK_LOOP_DIR}/state.md ${SYMLINK_LOOP_DIR}/cancel-state.md" +COMMAND_LOWER=$(to_lower "$COMMAND_LOWER") + +if is_cancel_authorized "$CANONICAL_LOOP_DIR" "$COMMAND_LOWER"; then + pass "is_cancel_authorized accepts symlinked-prefix path after realpath" +else + fail "helper symlink prefix" "returns 0 (authorized)" "returns non-zero" +fi + +rm -rf "$SYMLINK_ROOT" 2>/dev/null || true + +echo "HELPER TEST 9: is_cancel_authorized rejects destination symlink alias" +# Regression test for a P1 security issue: if the destination argument is a +# symlink that points at <loop>/cancel-state.md, canonicalizing the full +# path (leaf dereferenced) would let the alias pass authorization. `mv` +# would then operate on the link path itself, corrupting loop state and +# leaking state.md contents outside the loop dir. The fix resolves symlinks +# only in the parent directory and preserves the basename verbatim. +setup_test_loop "helper-9" +touch "$LOOP_DIR/.cancel-requested" +# Create the target file so the symlink would resolve if the prefix-only +# canonicalizer were relaxed back to full canonicalization. +touch "$LOOP_DIR/cancel-state.md" +ln -sfn "$LOOP_DIR/cancel-state.md" "$TEST_DIR/dest-alias" + +COMMAND_LOWER="mv ${LOOP_DIR}/state.md ${TEST_DIR}/dest-alias" +COMMAND_LOWER=$(to_lower "$COMMAND_LOWER") + +if is_cancel_authorized "$LOOP_DIR" "$COMMAND_LOWER"; then + fail "helper dest symlink alias" "returns non-zero (rejected)" "returns 0 (authorized)" +else + pass "is_cancel_authorized rejects destination symlink alias" +fi +rm -f "$TEST_DIR/dest-alias" "$LOOP_DIR/cancel-state.md" + +echo "HELPER TEST 10: is_cancel_authorized rejects source symlink alias" +# Regression test for a P1 security issue: if the source argument is a +# symlink aliasing <loop>/state.md, dereferencing the leaf would let it +# pass authorization. The on-disk symlink check (src_original) below +# would still catch this specific case because it probes the real path, +# but we defend in depth: the path comparison must reject the alias on +# its own. +setup_test_loop "helper-10" +touch "$LOOP_DIR/.cancel-requested" +ln -sfn "$LOOP_DIR/state.md" "$TEST_DIR/src-alias" + +COMMAND_LOWER="mv ${TEST_DIR}/src-alias ${LOOP_DIR}/cancel-state.md" +COMMAND_LOWER=$(to_lower "$COMMAND_LOWER") + +if is_cancel_authorized "$LOOP_DIR" "$COMMAND_LOWER"; then + fail "helper src symlink alias" "returns non-zero (rejected)" "returns 0 (authorized)" +else + pass "is_cancel_authorized rejects source symlink alias" +fi +rm -f "$TEST_DIR/src-alias" + # ======================================== # Summary # ======================================== diff --git a/tests/test-codex-hook-install.sh b/tests/test-codex-hook-install.sh new file mode 100755 index 00000000..da20fb96 --- /dev/null +++ b/tests/test-codex-hook-install.sh @@ -0,0 +1,334 @@ +#!/usr/bin/env bash +# +# Tests for Codex-native hook installation and merge behavior. +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" + +INSTALL_SCRIPT="$PROJECT_ROOT/scripts/install-skill.sh" + +echo "==========================================" +echo "Codex Hook Install Tests" +echo "==========================================" +echo "" + +if [[ ! -x "$INSTALL_SCRIPT" ]]; then + echo "FATAL: install-skill.sh not found at $INSTALL_SCRIPT" >&2 + exit 1 +fi + +if ! command -v python3 >/dev/null 2>&1; then + echo "FATAL: python3 is required for this test" >&2 + exit 1 +fi + +setup_test_dir + +FAKE_BIN="$TEST_DIR/bin" +CODEX_HOME_DIR="$TEST_DIR/codex-home" +HOOKS_FILE="$CODEX_HOME_DIR/hooks.json" +FEATURE_LOG="$TEST_DIR/codex-features.log" +XDG_CONFIG_HOME_DIR="$TEST_DIR/xdg-config" +HUMANIZE_USER_CONFIG="$XDG_CONFIG_HOME_DIR/humanize/config.json" +COMMAND_BIN_DIR="$TEST_DIR/command-bin" +mkdir -p "$FAKE_BIN" "$CODEX_HOME_DIR" "$COMMAND_BIN_DIR" + +cat > "$FAKE_BIN/codex" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail + +if [[ "${1:-}" == "features" && "${2:-}" == "list" ]]; then + cat <<'LIST' +codex_hooks under development false +LIST + exit 0 +fi + +if [[ "${1:-}" == "features" && "${2:-}" == "enable" && "${3:-}" == "codex_hooks" ]]; then + printf 'CODEX_HOME=%s\n' "${CODEX_HOME:-}" >> "${TEST_CODEX_FEATURE_LOG:?}" + mkdir -p "${CODEX_HOME:?}" + : > "${CODEX_HOME}/.codex-hooks-enabled" + exit 0 +fi + +if [[ "${1:-}" == "exec" ]]; then + cat <<'OUT' +LESSON_IDS: NONE +RATIONALE: No matching lessons found (fake codex exec). +OUT + exit 0 +fi + +echo "unexpected fake codex invocation: $*" >&2 +exit 1 +EOF +chmod +x "$FAKE_BIN/codex" + +cat > "$HOOKS_FILE" <<'EOF' +{ + "description": "Existing hooks", + "hooks": { + "SessionStart": [ + { + "hooks": [ + { + "type": "command", + "command": "/custom/session-start.sh", + "timeout": 15 + } + ] + } + ], + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "/tmp/old/skills/humanize/hooks/loop-codex-stop-hook.sh", + "timeout": 30 + } + ] + }, + { + "hooks": [ + { + "type": "command", + "command": "/custom/keep-me.sh", + "timeout": 5 + } + ] + } + ] + } +} +EOF + +PATH="$FAKE_BIN:$PATH" TEST_CODEX_FEATURE_LOG="$FEATURE_LOG" XDG_CONFIG_HOME="$XDG_CONFIG_HOME_DIR" \ + "$INSTALL_SCRIPT" \ + --target codex \ + --codex-config-dir "$CODEX_HOME_DIR" \ + --codex-skills-dir "$CODEX_HOME_DIR/skills" \ + --command-bin-dir "$COMMAND_BIN_DIR" \ + > "$TEST_DIR/install.log" 2>&1 + +if [[ -f "$CODEX_HOME_DIR/skills/humanize/SKILL.md" ]]; then + pass "Codex install syncs Humanize skill bundle" +else + fail "Codex install syncs Humanize skill bundle" "skills/humanize/SKILL.md exists" "missing" +fi + +if [[ -f "$CODEX_HOME_DIR/skills/humanize-rlcr/SKILL.md" ]]; then + pass "Codex install keeps humanize-rlcr entrypoint skill" +else + fail "Codex install keeps humanize-rlcr entrypoint skill" "skills/humanize-rlcr/SKILL.md exists" "missing" +fi + +if [[ -f "$HOOKS_FILE" ]]; then + pass "Codex install writes hooks.json" +else + fail "Codex install writes hooks.json" "$HOOKS_FILE exists" "missing" +fi + +if [[ -f "$CODEX_HOME_DIR/.codex-hooks-enabled" ]]; then + pass "Codex install enables codex_hooks feature" +else + fail "Codex install enables codex_hooks feature" ".codex-hooks-enabled marker exists" "missing" +fi + +if [[ -f "$HUMANIZE_USER_CONFIG" ]]; then + pass "Codex install writes Humanize user config" +else + fail "Codex install writes Humanize user config" "$HUMANIZE_USER_CONFIG exists" "missing" +fi + +if [[ -x "$COMMAND_BIN_DIR/bitlesson-selector" ]]; then + pass "Codex install writes a PATH-ready bitlesson-selector shim" +else + fail "Codex install writes a PATH-ready bitlesson-selector shim" "$COMMAND_BIN_DIR/bitlesson-selector exists" "missing" +fi + +if [[ "$(jq -r '.bitlesson_model // empty' "$HUMANIZE_USER_CONFIG")" == "gpt-5.5" ]]; then + pass "Codex install seeds bitlesson_model with a Codex/OpenAI model" +else + fail "Codex install seeds bitlesson_model with a Codex/OpenAI model" \ + "gpt-5.5" "$(jq -c '.' "$HUMANIZE_USER_CONFIG" 2>/dev/null || echo MISSING)" +fi + +if [[ "$(jq -r '.provider_mode // empty' "$HUMANIZE_USER_CONFIG")" == "codex-only" ]]; then + pass "Codex install marks Humanize user config as codex-only" +else + fail "Codex install marks Humanize user config as codex-only" \ + "codex-only" "$(jq -c '.' "$HUMANIZE_USER_CONFIG" 2>/dev/null || echo MISSING)" +fi + +runtime_root="$CODEX_HOME_DIR/skills/humanize" +PY_OUTPUT="$( + python3 - "$HOOKS_FILE" "$runtime_root" <<'PY' +import json +import pathlib +import sys + +hooks_file = pathlib.Path(sys.argv[1]) +runtime_root = sys.argv[2] +data = json.loads(hooks_file.read_text(encoding="utf-8")) + +commands = [] +for group in data["hooks"]["Stop"]: + for hook in group.get("hooks", []): + command = hook.get("command") + if isinstance(command, str): + commands.append(command) + +expected = { + f"{runtime_root}/hooks/loop-codex-stop-hook.sh", +} + +print("FOUND=" + ("1" if expected.issubset(set(commands)) else "0")) +print("KEEP=" + ("1" if "/custom/keep-me.sh" in commands else "0")) +print("OLD=" + ("1" if any("/tmp/old/skills/humanize/hooks/" in cmd for cmd in commands) else "0")) +print("SESSION=" + ("1" if data["hooks"]["SessionStart"][0]["hooks"][0]["command"] == "/custom/session-start.sh" else "0")) +print("COUNT=" + str(sum(1 for cmd in commands if "/humanize/hooks/" in cmd))) +PY +)" + +if grep -q '^FOUND=1$' <<<"$PY_OUTPUT"; then + pass "Codex install adds managed Humanize Stop hook commands" +else + fail "Codex install adds managed Humanize Stop hook commands" "FOUND=1" "$PY_OUTPUT" +fi + +if grep -q '^KEEP=1$' <<<"$PY_OUTPUT"; then + pass "Codex install preserves unrelated Stop hooks" +else + fail "Codex install preserves unrelated Stop hooks" "KEEP=1" "$PY_OUTPUT" +fi + +if grep -q '^OLD=0$' <<<"$PY_OUTPUT"; then + pass "Codex install removes stale Humanize hook commands" +else + fail "Codex install removes stale Humanize hook commands" "OLD=0" "$PY_OUTPUT" +fi + +if grep -q '^SESSION=1$' <<<"$PY_OUTPUT"; then + pass "Codex install preserves SessionStart hooks" +else + fail "Codex install preserves SessionStart hooks" "SESSION=1" "$PY_OUTPUT" +fi + +if grep -q '^COUNT=1$' <<<"$PY_OUTPUT"; then + pass "Codex install writes exactly one managed Humanize Stop hook" +else + fail "Codex install writes exactly one managed Humanize Stop hook" "COUNT=1" "$PY_OUTPUT" +fi + +mkdir -p "$TEST_DIR/project" +cat > "$TEST_DIR/project/bitlesson.md" <<'EOF' +# BitLesson Knowledge Base +## Entries +<!-- placeholder --> +EOF + +shim_output="$( + CLAUDE_PROJECT_DIR="$TEST_DIR/project" \ + XDG_CONFIG_HOME="$XDG_CONFIG_HOME_DIR" \ + PATH="$COMMAND_BIN_DIR:$FAKE_BIN:$PATH" \ + "$COMMAND_BIN_DIR/bitlesson-selector" \ + --task "Verify the shim dispatches into the installed runtime" \ + --paths "README.md" \ + --bitlesson-file "$TEST_DIR/project/bitlesson.md" +)" + +if grep -q '^LESSON_IDS: NONE$' <<<"$shim_output"; then + pass "bitlesson-selector shim dispatches into installed runtime" +else + fail "bitlesson-selector shim dispatches into installed runtime" "LESSON_IDS: NONE" "$shim_output" +fi + +PATH="$FAKE_BIN:$PATH" TEST_CODEX_FEATURE_LOG="$FEATURE_LOG" XDG_CONFIG_HOME="$XDG_CONFIG_HOME_DIR" \ + "$INSTALL_SCRIPT" \ + --target codex \ + --codex-config-dir "$CODEX_HOME_DIR" \ + --codex-skills-dir "$CODEX_HOME_DIR/skills" \ + > "$TEST_DIR/install-2.log" 2>&1 + +PY_OUTPUT_2="$( + python3 - "$HOOKS_FILE" <<'PY' +import json +import pathlib +import sys + +hooks_file = pathlib.Path(sys.argv[1]) +data = json.loads(hooks_file.read_text(encoding="utf-8")) + +commands = [] +for group in data["hooks"]["Stop"]: + for hook in group.get("hooks", []): + command = hook.get("command") + if isinstance(command, str): + commands.append(command) + +print(sum(1 for cmd in commands if "/humanize/hooks/" in cmd)) +PY +)" + +if [[ "$PY_OUTPUT_2" == "1" ]]; then + pass "Codex install is idempotent for managed hook commands" +else + fail "Codex install is idempotent for managed hook commands" "1" "$PY_OUTPUT_2" +fi + +if [[ "$(wc -l < "$FEATURE_LOG" | tr -d ' ')" == "2" ]]; then + pass "Codex feature enable runs on each Codex install/update" +else + fail "Codex feature enable runs on each Codex install/update" "2 log entries" "$(cat "$FEATURE_LOG")" +fi + +UNSUPPORTED_BIN="$TEST_DIR/bin-unsupported" +UNSUPPORTED_HOME="$TEST_DIR/codex-home-unsupported" +mkdir -p "$UNSUPPORTED_BIN" "$UNSUPPORTED_HOME" + +cat > "$UNSUPPORTED_BIN/codex" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail + +if [[ "${1:-}" == "features" && "${2:-}" == "list" ]]; then + cat <<'LIST' +apply_patch_freeform under development false +LIST + exit 0 +fi + +echo "unexpected fake codex invocation: $*" >&2 +exit 1 +EOF +chmod +x "$UNSUPPORTED_BIN/codex" + +set +e +PATH="$UNSUPPORTED_BIN:$PATH" \ + "$INSTALL_SCRIPT" \ + --target codex \ + --codex-config-dir "$UNSUPPORTED_HOME" \ + --codex-skills-dir "$UNSUPPORTED_HOME/skills" \ + > "$TEST_DIR/install-unsupported.log" 2>&1 +UNSUPPORTED_EXIT=$? +set -e + +if [[ "$UNSUPPORTED_EXIT" -ne 0 ]]; then + pass "Codex install rejects builds without native hooks support" +else + fail "Codex install rejects builds without native hooks support" "non-zero exit" "exit 0" +fi + +if grep -q "codex_hooks feature" "$TEST_DIR/install-unsupported.log"; then + pass "Unsupported Codex failure explains missing codex_hooks feature" +else + fail "Unsupported Codex failure explains missing codex_hooks feature" \ + "error mentioning codex_hooks feature" \ + "$(cat "$TEST_DIR/install-unsupported.log")" +fi + +print_test_summary "Codex Hook Install Tests" diff --git a/tests/test-codex-review-merge.sh b/tests/test-codex-review-merge.sh index 8761bde7..ed1082bf 100755 --- a/tests/test-codex-review-merge.sh +++ b/tests/test-codex-review-merge.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for Code Review log file analysis behavior # diff --git a/tests/test-commit-history-section.sh b/tests/test-commit-history-section.sh new file mode 100755 index 00000000..7177f5e8 --- /dev/null +++ b/tests/test-commit-history-section.sh @@ -0,0 +1,271 @@ +#!/usr/bin/env bash +# +# Test script for the Integral (I) component: commit-history-section +# +# Validates: +# 1. Round 0: "(no commits yet)" and "(first round, no prior history)" +# 2. Round 2+: commit log and round file references rendered correctly +# 3. Corrupted BASE_COMMIT: graceful fallback with annotation +# 4. Template missing: fallback renders the full section including round files +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" +source "$PROJECT_ROOT/hooks/lib/template-loader.sh" + +TEMPLATE_DIR="$PROJECT_ROOT/prompt-template" + +echo "========================================" +echo "Testing commit-history-section (I component)" +echo "========================================" +echo "" + +# ======================================== +# Setup: create a temporary git repo +# ======================================== +setup_test_dir +init_test_git_repo "$TEST_DIR/repo" + +# ======================================== +# Test 1: Round 0 - no commits since base, first round +# ======================================== +echo "Test 1: Round 0 - no commits, first round" + +CURRENT_ROUND=0 +BASE_COMMIT=$(git -C "$TEST_DIR/repo" rev-parse HEAD) + +# No commits since BASE_COMMIT..HEAD (same commit) +COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse "$BASE_COMMIT"..HEAD 2>/dev/null | tail -80) +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +RECENT_ROUND_FILES="" +LOOP_TIMESTAMP="2026-01-01_00-00-00" +for (( r = CURRENT_ROUND - 1; r >= 0 && r >= CURRENT_ROUND - 3; r-- )); do + RECENT_ROUND_FILES+="- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-summary.md +- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-review-result.md +" +done +[[ -z "$RECENT_ROUND_FILES" ]] && RECENT_ROUND_FILES="(first round, no prior history)" + +RESULT=$(load_and_render_safe "$TEMPLATE_DIR" "codex/commit-history-section.md" "FALLBACK" \ + "COMMIT_HISTORY=$COMMIT_HISTORY" \ + "RECENT_ROUND_FILES=$RECENT_ROUND_FILES") + +if echo "$RESULT" | grep -q "(no commits yet)" && echo "$RESULT" | grep -q "(first round, no prior history)"; then + pass "Round 0 shows correct placeholders" +else + fail "Round 0 placeholders" "(no commits yet) and (first round, no prior history)" "$RESULT" +fi + +# ======================================== +# Test 2: Round 3 - with commits and round history +# ======================================== +echo "" +echo "Test 2: Round 3 - commits and round file references" + +# Make some commits +cd "$TEST_DIR/repo" +echo "feat1" > feat1.txt && git add feat1.txt && git commit -q -m "feat: add feature 1" +echo "feat2" > feat2.txt && git add feat2.txt && git commit -q -m "feat: add feature 2" +echo "fix1" > fix1.txt && git add fix1.txt && git commit -q -m "fix: resolve bug in feature 1" +cd - > /dev/null + +CURRENT_ROUND=3 +COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse "$BASE_COMMIT"..HEAD 2>/dev/null | tail -80) +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +RECENT_ROUND_FILES="" +for (( r = CURRENT_ROUND - 1; r >= 0 && r >= CURRENT_ROUND - 3; r-- )); do + RECENT_ROUND_FILES+="- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-summary.md +- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-review-result.md +" +done +[[ -z "$RECENT_ROUND_FILES" ]] && RECENT_ROUND_FILES="(first round, no prior history)" + +RESULT=$(load_and_render_safe "$TEMPLATE_DIR" "codex/commit-history-section.md" "FALLBACK" \ + "COMMIT_HISTORY=$COMMIT_HISTORY" \ + "RECENT_ROUND_FILES=$RECENT_ROUND_FILES") + +HAS_COMMITS=true +HAS_ROUNDS=true + +echo "$RESULT" | grep -q "feat: add feature 1" || HAS_COMMITS=false +echo "$RESULT" | grep -q "feat: add feature 2" || HAS_COMMITS=false +echo "$RESULT" | grep -q "fix: resolve bug in feature 1" || HAS_COMMITS=false + +echo "$RESULT" | grep -q "round-2-summary.md" || HAS_ROUNDS=false +echo "$RESULT" | grep -q "round-1-summary.md" || HAS_ROUNDS=false +echo "$RESULT" | grep -q "round-0-summary.md" || HAS_ROUNDS=false +echo "$RESULT" | grep -q "round-2-review-result.md" || HAS_ROUNDS=false + +if [[ "$HAS_COMMITS" == "true" ]]; then + pass "Round 3 shows all 3 commits" +else + fail "Round 3 commits" "3 commit messages" "$RESULT" +fi + +if [[ "$HAS_ROUNDS" == "true" ]]; then + pass "Round 3 shows round 0-2 file references" +else + fail "Round 3 round files" "round-0/1/2 summary and review files" "$RESULT" +fi + +# ======================================== +# Test 3: Corrupted BASE_COMMIT - nonexistent object +# ======================================== +echo "" +echo "Test 3: Corrupted BASE_COMMIT graceful fallback" + +BAD_COMMIT="deadbeefdeadbeefdeadbeefdeadbeefdeadbeef" + +# Simulate the exact logic from the stop hook (merge-base --is-ancestor) +if [[ -n "$BAD_COMMIT" ]] && git -C "$TEST_DIR/repo" merge-base --is-ancestor "$BAD_COMMIT" HEAD 2>/dev/null; then + COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse "$BAD_COMMIT"..HEAD 2>/dev/null | tail -80) +else + COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse -30 2>/dev/null) + [[ -n "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(base commit unavailable, showing recent branch commits) +${COMMIT_HISTORY}" +fi +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +if echo "$COMMIT_HISTORY" | grep -q "base commit unavailable"; then + pass "Corrupted BASE_COMMIT triggers annotation" +else + fail "Corrupted BASE_COMMIT annotation" "base commit unavailable" "$COMMIT_HISTORY" +fi + +if echo "$COMMIT_HISTORY" | grep -q "feat: add feature"; then + pass "Corrupted BASE_COMMIT still shows recent commits" +else + fail "Corrupted BASE_COMMIT recent commits" "recent branch commits" "$COMMIT_HISTORY" +fi + +# Verify no crash (we got here = no set -e crash) +pass "Corrupted BASE_COMMIT did not crash (set -e safe)" + +# ======================================== +# Test 3b: Valid but unrelated commit (not ancestor of HEAD) +# ======================================== +echo "" +echo "Test 3b: Valid but unrelated BASE_COMMIT (orphan branch)" + +# Create an orphan branch with its own commit, then switch back +cd "$TEST_DIR/repo" +ORIG_BRANCH=$(git rev-parse --abbrev-ref HEAD) +git checkout -q --orphan orphan-test +echo "orphan" > orphan.txt && git add orphan.txt && git commit -q -m "orphan commit" +ORPHAN_COMMIT=$(git rev-parse HEAD) +git checkout -q "$ORIG_BRANCH" +cd - > /dev/null + +# ORPHAN_COMMIT exists but is NOT an ancestor of HEAD +if [[ -n "$ORPHAN_COMMIT" ]] && git -C "$TEST_DIR/repo" merge-base --is-ancestor "$ORPHAN_COMMIT" HEAD 2>/dev/null; then + COMMIT_HISTORY="should not reach here" +else + COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse -30 2>/dev/null) + [[ -n "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(base commit unavailable, showing recent branch commits) +${COMMIT_HISTORY}" +fi +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +if echo "$COMMIT_HISTORY" | grep -q "base commit unavailable"; then + pass "Unrelated valid commit triggers annotation" +else + fail "Unrelated valid commit annotation" "base commit unavailable" "$COMMIT_HISTORY" +fi + +# ======================================== +# Test 4: Missing template - fallback renders full section +# ======================================== +echo "" +echo "Test 4: Missing template fallback renders full section" + +CURRENT_ROUND=2 +COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse "$BASE_COMMIT"..HEAD 2>/dev/null | tail -80) + +RECENT_ROUND_FILES="" +for (( r = CURRENT_ROUND - 1; r >= 0 && r >= CURRENT_ROUND - 3; r-- )); do + RECENT_ROUND_FILES+="- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-summary.md +- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-review-result.md +" +done + +# Use the exact fallback format from the stop hook +COMMIT_HISTORY_SECTION_FALLBACK="## Development History (Integral Context) +\`\`\` +${COMMIT_HISTORY} +\`\`\` +### Recent Round Files +Read these files before conducting your review to understand the trajectory of work: +${RECENT_ROUND_FILES}" + +# Point to a non-existent template to force fallback +RESULT=$(load_and_render_safe "$TEMPLATE_DIR" "codex/non-existent-template.md" "$COMMIT_HISTORY_SECTION_FALLBACK" \ + "COMMIT_HISTORY=$COMMIT_HISTORY" \ + "RECENT_ROUND_FILES=$RECENT_ROUND_FILES") + +FALLBACK_OK=true +echo "$RESULT" | grep -q "Development History" || FALLBACK_OK=false +echo "$RESULT" | grep -q "feat: add feature 1" || FALLBACK_OK=false +echo "$RESULT" | grep -q "Recent Round Files" || FALLBACK_OK=false +echo "$RESULT" | grep -q "round-1-summary.md" || FALLBACK_OK=false +echo "$RESULT" | grep -q "round-0-review-result.md" || FALLBACK_OK=false +echo "$RESULT" | grep -q "Read these files" || FALLBACK_OK=false + +if [[ "$FALLBACK_OK" == "true" ]]; then + pass "Fallback renders full section with commits, round files, and directive" +else + fail "Fallback full section" "commits + round files + directive" "$RESULT" +fi + +# ======================================== +# Test 5: Round 1 - only 1 prior round (boundary) +# ======================================== +echo "" +echo "Test 5: Round 1 - only 1 prior round" + +CURRENT_ROUND=1 +RECENT_ROUND_FILES="" +for (( r = CURRENT_ROUND - 1; r >= 0 && r >= CURRENT_ROUND - 3; r-- )); do + RECENT_ROUND_FILES+="- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-summary.md +- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-review-result.md +" +done +[[ -z "$RECENT_ROUND_FILES" ]] && RECENT_ROUND_FILES="(first round, no prior history)" + +if echo "$RECENT_ROUND_FILES" | grep -q "round-0-summary.md" && \ + ! echo "$RECENT_ROUND_FILES" | grep -q "round-1-"; then + pass "Round 1 references only round 0" +else + fail "Round 1 boundary" "only round-0 references" "$RECENT_ROUND_FILES" +fi + +# ======================================== +# Test 6: Empty BASE_COMMIT (legacy loop) +# ======================================== +echo "" +echo "Test 6: Empty BASE_COMMIT fallback" + +EMPTY_BASE="" +if [[ -n "$EMPTY_BASE" ]] && git -C "$TEST_DIR/repo" merge-base --is-ancestor "$EMPTY_BASE" HEAD 2>/dev/null; then + COMMIT_HISTORY="should not reach here" +else + COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse -30 2>/dev/null) + [[ -n "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(base commit unavailable, showing recent branch commits) +${COMMIT_HISTORY}" +fi +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +if echo "$COMMIT_HISTORY" | grep -q "base commit unavailable"; then + pass "Empty BASE_COMMIT triggers annotation" +else + fail "Empty BASE_COMMIT annotation" "base commit unavailable" "$COMMIT_HISTORY" +fi + +# ======================================== +# Summary +# ======================================== +print_test_summary "Commit History Section (I Component) Tests" diff --git a/tests/test-config-error-handling.sh b/tests/test-config-error-handling.sh index 24c88e2d..4f328493 100755 --- a/tests/test-config-error-handling.sh +++ b/tests/test-config-error-handling.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for error handling in scripts/lib/config-loader.sh # diff --git a/tests/test-config-merge.sh b/tests/test-config-merge.sh index 2816653c..df877222 100755 --- a/tests/test-config-merge.sh +++ b/tests/test-config-merge.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for 4-layer config merge behavior in scripts/lib/config-loader.sh # diff --git a/tests/test-disable-nested-codex-hooks.sh b/tests/test-disable-nested-codex-hooks.sh new file mode 100755 index 00000000..c240ad65 --- /dev/null +++ b/tests/test-disable-nested-codex-hooks.sh @@ -0,0 +1,218 @@ +#!/usr/bin/env bash +# +# Ensure Humanize's nested Codex reviewer calls disable native hooks to avoid recursion. +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +GREEN='\033[0;32m' +RED='\033[0;31m' +NC='\033[0m' +TESTS_PASSED=0 +TESTS_FAILED=0 + +pass() { + echo -e "${GREEN}PASS${NC}: $1" + TESTS_PASSED=$((TESTS_PASSED + 1)) +} + +fail() { + echo -e "${RED}FAIL${NC}: $1" + echo " Expected: $2" + echo " Got: $3" + TESTS_FAILED=$((TESTS_FAILED + 1)) +} + +echo "==========================================" +echo "Disable Nested Codex Hooks Tests" +echo "==========================================" +echo "" + +TEST_DIR="$(mktemp -d)" +trap 'rm -rf "$TEST_DIR"' EXIT + +export XDG_CACHE_HOME="$TEST_DIR/.cache" +mkdir -p "$XDG_CACHE_HOME" + +STOP_HOOK="$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" + +setup_repo() { + local repo_dir="$1" + + mkdir -p "$repo_dir" + cd "$repo_dir" + git init -q + git config user.email "test@test.com" + git config user.name "Test User" + git config commit.gpgsign false + + cat > .gitignore <<'EOF' +.humanize/ +plans/ +.cache/ +EOF + mkdir -p plans + cat > plans/test-plan.md <<'EOF' +# Test Plan +EOF + echo "init" > init.txt + git add .gitignore init.txt + git -c commit.gpgsign=false commit -q -m "initial" +} + +setup_mock_codex() { + local bin_dir="$1" + local args_file="$2" + + mkdir -p "$bin_dir" + cat > "$bin_dir/codex" <<EOF +#!/usr/bin/env bash +# The stop hook probes feature support with \`codex --help\`; advertise +# --disable so the nested invocation is expected to include it. +if [[ "\$1" == "--help" ]]; then + cat <<HELP +Usage: codex [OPTIONS] <COMMAND> + +Options: + --disable <HOOK> Disable a specific Codex hook (e.g. codex_hooks) + --skip-git-repo-check Skip git repo validation +HELP + exit 0 +fi + +printf '%s\n' "\$*" > "$args_file" + +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done + +if [[ "\$subcommand" == "exec" ]]; then + echo "Review: keep iterating." + exit 0 +fi + +if [[ "\$subcommand" == "review" ]]; then + echo "No issues found." + exit 0 +fi + +echo "unexpected codex args: \$*" >&2 +exit 1 +EOF + chmod +x "$bin_dir/codex" +} + +setup_loop_dir() { + local repo_dir="$1" + local review_started="$2" + local loop_dir="$repo_dir/.humanize/rlcr/2026-03-14_12-00-00" + local current_branch + local base_commit + + current_branch="$(git -C "$repo_dir" rev-parse --abbrev-ref HEAD)" + base_commit="$(git -C "$repo_dir" rev-parse HEAD)" + + mkdir -p "$loop_dir" + cat > "$loop_dir/state.md" <<EOF +--- +current_round: 1 +max_iterations: 42 +plan_file: plans/test-plan.md +plan_tracked: false +start_branch: $current_branch +base_branch: $current_branch +base_commit: $base_commit +push_every_round: false +codex_model: gpt-5.5 +codex_effort: high +codex_timeout: 120 +review_started: $review_started +started_at: 2026-03-14T12:00:00Z +ask_codex_question: false +agent_teams: false +--- +EOF + + cp "$repo_dir/plans/test-plan.md" "$loop_dir/plan.md" + cat > "$loop_dir/goal-tracker.md" <<'EOF' +# Goal Tracker +## IMMUTABLE SECTION +### Ultimate Goal +Test nested codex disable +### Acceptance Criteria +- AC-1: Hook can run + +## MUTABLE SECTION +### Active Tasks +- Verify hook argv +EOF + + cat > "$loop_dir/round-1-summary.md" <<'EOF' +# Round Summary +Implemented initial changes. +EOF + + if [[ "$review_started" == "true" ]]; then + echo "build_finish_round=1" > "$loop_dir/.review-phase-started" + fi +} + +run_loop_hook() { + local repo_dir="$1" + local args_file="$2" + local review_started="$3" + local bin_dir="$TEST_DIR/bin-${review_started}" + + setup_mock_codex "$bin_dir" "$args_file" + setup_loop_dir "$repo_dir" "$review_started" + + set +e + OUTPUT=$(echo '{}' | PATH="$bin_dir:$PATH" CLAUDE_PROJECT_DIR="$repo_dir" bash "$STOP_HOOK" 2>&1) + EXIT_CODE=$? + set -e + + if [[ $EXIT_CODE -ne 0 ]]; then + fail "loop hook completes in $review_started mode" "exit 0" "exit=$EXIT_CODE output=$OUTPUT" + return + fi +} + +REPO_IMPL="$TEST_DIR/repo-impl" +setup_repo "$REPO_IMPL" +run_loop_hook "$REPO_IMPL" "$TEST_DIR/impl.args" "false" + +if grep -q -- 'exec --disable codex_hooks' "$TEST_DIR/impl.args"; then + pass "implementation-phase stop hook disables codex_hooks for codex exec" +else + fail "implementation-phase stop hook disables codex_hooks for codex exec" \ + "exec --disable codex_hooks" "$(cat "$TEST_DIR/impl.args" 2>/dev/null || echo missing)" +fi + +REPO_REVIEW="$TEST_DIR/repo-review" +setup_repo "$REPO_REVIEW" +run_loop_hook "$REPO_REVIEW" "$TEST_DIR/review.args" "true" + +if grep -q -- 'review --disable codex_hooks' "$TEST_DIR/review.args"; then + pass "review-phase stop hook disables codex_hooks for codex review" +else + fail "review-phase stop hook disables codex_hooks for codex review" \ + "review --disable codex_hooks" "$(cat "$TEST_DIR/review.args" 2>/dev/null || echo missing)" +fi + +echo "" +echo "========================================" +echo "Disable Nested Codex Hooks Tests" +echo "========================================" +echo "Passed: $TESTS_PASSED" +echo "Failed: $TESTS_FAILED" + +if [[ $TESTS_FAILED -ne 0 ]]; then + exit 1 +fi diff --git a/tests/test-error-scenarios.sh b/tests/test-error-scenarios.sh index c05c1d8a..65930b1b 100755 --- a/tests/test-error-scenarios.sh +++ b/tests/test-error-scenarios.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test error scenarios for template-loader.sh # diff --git a/tests/test-finalize-phase.sh b/tests/test-finalize-phase.sh index 96890a41..03a3e408 100755 --- a/tests/test-finalize-phase.sh +++ b/tests/test-finalize-phase.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for Finalize Phase feature # @@ -55,13 +55,20 @@ setup_mock_codex() { local review_output="${2:-No issues found.}" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << EOF -#!/bin/bash +#!/usr/bin/env bash # Mock codex - outputs the provided content -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'REVIEW' $output REVIEW -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then # Handle codex review command cat << 'REVIEWOUT' $review_output @@ -79,14 +86,21 @@ setup_mock_codex_with_tracking() { local review_output="${2:-No issues found.}" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << EOF -#!/bin/bash +#!/usr/bin/env bash # Track that codex was called echo "CODEX_WAS_CALLED" > "$TEST_DIR/codex_called.marker" -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'REVIEW' $output REVIEW -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then cat << 'REVIEWOUT' $review_output REVIEWOUT @@ -104,13 +118,20 @@ setup_mock_codex_review_failure() { local review_exit_code="${2:-1}" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << EOF -#!/bin/bash +#!/usr/bin/env bash # Mock codex - fails on review command -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'REVIEW' $exec_output REVIEW -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then # Simulate failure with non-zero exit echo "Error: Codex review failed" >&2 exit $review_exit_code @@ -126,13 +147,20 @@ setup_mock_codex_review_empty_stdout() { local exec_output="$1" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << EOF -#!/bin/bash +#!/usr/bin/env bash # Mock codex - produces empty stdout on review -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'REVIEW' $exec_output REVIEW -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then # Exit successfully but produce no output exit 0 fi @@ -191,7 +219,7 @@ setup_loop_dir() { --- current_round: $round max_iterations: $max_iter -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 5400 push_every_round: false @@ -200,6 +228,9 @@ plan_tracked: false start_branch: $current_branch base_branch: main review_started: false +mainline_stall_count: 0 +last_mainline_verdict: unknown +drift_status: normal started_at: 2024-01-01T12:00:00Z --- EOF @@ -223,6 +254,16 @@ Test finalize phase | Task | Target AC | Status | |------|-----------|--------| | Test | AC-1 | completed | +EOF + + cat > "$LOOP_DIR/round-${round}-contract.md" << EOF +# Round $round Contract + +- Mainline Objective: Verify finalize phase coverage +- Target ACs: AC-1 +- Blocking Side Issues In Scope: none +- Queued Side Issues Out of Scope: none +- Success Criteria: current round artifacts are complete EOF } @@ -366,6 +407,18 @@ else fail "Write validator finalize-state.md" "exit 2 with finalize error" "exit $EXIT_CODE, output: $RESULT" fi +echo "T-NEG-5aa: Write validator blocks round contract during Finalize Phase" +HOOK_INPUT='{"tool_name": "Write", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "contract"; then + pass "Write validator blocks finalize-phase round contract" +else + fail "Write validator finalize-phase contract" "exit 2 with contract error" "exit $EXIT_CODE, output: $RESULT" +fi + echo "T-NEG-5b: Edit validator blocks finalize-state.md" HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/finalize-state.md"}}' set +e @@ -378,6 +431,18 @@ else fail "Edit validator finalize-state.md" "exit 2 with finalize error" "exit $EXIT_CODE, output: $RESULT" fi +echo "T-NEG-5bb: Edit validator blocks round contract during Finalize Phase" +HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "contract"; then + pass "Edit validator blocks finalize-phase round contract" +else + fail "Edit validator finalize-phase contract" "exit 2 with contract error" "exit $EXIT_CODE, output: $RESULT" +fi + echo "T-NEG-5c: Bash validator blocks finalize-state.md modification" HOOK_INPUT='{"tool_name": "Bash", "tool_input": {"command": "echo test > '$LOOP_DIR'/finalize-state.md"}}' set +e @@ -513,6 +578,8 @@ setup_test_repo setup_loop_dir 3 10 # current_round: 3, max_iterations: 10 setup_mock_codex "All requirements met. +Mainline Progress Verdict: ADVANCED + COMPLETE" # Create summary for current round @@ -571,6 +638,8 @@ setup_test_repo setup_loop_dir 3 10 # current_round: 3, max_iterations: 10 setup_mock_codex_review_failure "All requirements met. +Mainline Progress Verdict: ADVANCED + COMPLETE" 1 # Create summary for current round @@ -630,6 +699,8 @@ setup_test_repo setup_loop_dir 4 10 # current_round: 4, max_iterations: 10 setup_mock_codex_review_empty_stdout "All requirements met. +Mainline Progress Verdict: ADVANCED + COMPLETE" # Create summary for current round @@ -752,6 +823,8 @@ setup_loop_dir 3 10 # current_round: 3, max_iterations: 10 # Create a mock Codex that outputs review feedback (not COMPLETE) setup_mock_codex "## Review Feedback +Mainline Progress Verdict: ADVANCED + Some issues need to be addressed: - Issue 1: Fix the bug in function X - Issue 2: Add tests for edge case Y @@ -813,6 +886,158 @@ else fail "Review feedback in output" "output contains 'Issue 1' from Codex review" "output does not contain expected feedback" fi +echo "" +echo "=== T-POS-6 / T-NEG-10: Mainline Drift State Machine ===" +echo "" + +# T-POS-6: Two consecutive stalled rounds trigger drift recovery prompt +rm -rf "$TEST_DIR/.humanize" +setup_test_repo +setup_loop_dir 3 10 +perl -0pi -e 's/mainline_stall_count: 0/mainline_stall_count: 1/' "$LOOP_DIR/state.md" +perl -0pi -e 's/last_mainline_verdict: unknown/last_mainline_verdict: stalled/' "$LOOP_DIR/state.md" + +setup_mock_codex "## Review Feedback + +Mainline Progress Verdict: STALLED + +- Mainline gap: AC-1 still lacks a passing implementation path +- Blocking side issue: current approach keeps looping on the same failing path + +Please recover the mainline before trying again. + +CONTINUE" + +cat > "$LOOP_DIR/round-3-summary.md" << 'EOF' +# Round 3 Summary +Tried another implementation pass, but AC-1 is still not advancing. +EOF + +TRANSCRIPT_FILE="$TEST_DIR/transcript.jsonl" +cat > "$TRANSCRIPT_FILE" << 'EOF' +{"type": "assistant", "message": {"content": [{"type": "tool_use", "name": "TodoWrite", "input": {"todos": [{"content": "[mainline] Recover AC-1", "status": "completed", "activeForm": "Recovering AC-1"}]}}]}} +EOF + +echo "T-POS-6: Two stalled rounds trigger drift recovery prompt" +HOOK_INPUT='{"stop_hook_active": false, "transcript_path": "'$TRANSCRIPT_FILE'"}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" 2>&1) +EXIT_CODE=$? +set -e + +if echo "$RESULT" | grep -q '"decision".*block' && [[ -f "$LOOP_DIR/round-4-prompt.md" ]]; then + pass "Drift recovery round blocks exit and creates next prompt" +else + fail "Drift recovery prompt creation" "block with round-4 prompt" "exit $EXIT_CODE, output: $RESULT" +fi + +if grep -q "Drift Recovery Mode" "$LOOP_DIR/round-4-prompt.md"; then + pass "Drift recovery prompt uses special replan template" +else + fail "Drift recovery prompt template" "Drift Recovery Mode in prompt" "$(cat "$LOOP_DIR/round-4-prompt.md")" +fi + +parse_state_file "$LOOP_DIR/state.md" +if [[ "$STATE_CURRENT_ROUND" == "4" ]] && [[ "$STATE_MAINLINE_STALL_COUNT" == "2" ]] && [[ "$STATE_LAST_MAINLINE_VERDICT" == "stalled" ]] && [[ "$STATE_DRIFT_STATUS" == "replan_required" ]]; then + pass "State records drift recovery requirement after second stalled round" +else + fail "Drift recovery state update" "round=4 stall=2 verdict=stalled drift=replan_required" \ + "round=$STATE_CURRENT_ROUND stall=$STATE_MAINLINE_STALL_COUNT verdict=$STATE_LAST_MAINLINE_VERDICT drift=$STATE_DRIFT_STATUS" +fi + +# T-NEG-10a: Missing Mainline Progress Verdict blocks exit and preserves state +rm -rf "$TEST_DIR/.humanize" +setup_test_repo +setup_loop_dir 3 10 +perl -0pi -e 's/mainline_stall_count: 0/mainline_stall_count: 1/' "$LOOP_DIR/state.md" +perl -0pi -e 's/last_mainline_verdict: unknown/last_mainline_verdict: stalled/' "$LOOP_DIR/state.md" + +setup_mock_codex "## Review Feedback + +- Mainline gap: AC-1 still lacks a passing implementation path +- Blocking side issue: current approach keeps looping on the same failing path + +Please restate the mainline more clearly. + +CONTINUE" + +cat > "$LOOP_DIR/round-3-summary.md" << 'EOF' +# Round 3 Summary +Tried another implementation pass, but the review omitted the verdict line. +EOF + +echo "T-NEG-10a: Missing Mainline Progress Verdict blocks exit" +HOOK_INPUT='{"stop_hook_active": false, "transcript_path": "'$TRANSCRIPT_FILE'"}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" 2>&1) +EXIT_CODE=$? +set -e + +if echo "$RESULT" | grep -q '"decision".*block' && echo "$RESULT" | grep -qi "verdict"; then + pass "Missing Mainline Progress Verdict blocks exit" +else + fail "Missing Mainline Progress Verdict" "block with verdict error" "exit $EXIT_CODE, output: $RESULT" +fi + +if [[ ! -f "$LOOP_DIR/round-4-prompt.md" ]]; then + pass "Missing verdict does not generate next-round prompt" +else + fail "Missing verdict prompt generation" "no round-4 prompt" "$(cat "$LOOP_DIR/round-4-prompt.md")" +fi + +parse_state_file "$LOOP_DIR/state.md" +if [[ "$STATE_CURRENT_ROUND" == "3" ]] && [[ "$STATE_MAINLINE_STALL_COUNT" == "1" ]] && [[ "$STATE_LAST_MAINLINE_VERDICT" == "stalled" ]] && [[ "$STATE_DRIFT_STATUS" == "normal" ]]; then + pass "Missing verdict preserves prior drift state" +else + fail "Missing verdict state preservation" "round=3 stall=1 verdict=stalled drift=normal" \ + "round=$STATE_CURRENT_ROUND stall=$STATE_MAINLINE_STALL_COUNT verdict=$STATE_LAST_MAINLINE_VERDICT drift=$STATE_DRIFT_STATUS" +fi + +# T-NEG-10: Third consecutive stalled/regressed round stops the loop +rm -rf "$TEST_DIR/.humanize" +setup_test_repo +setup_loop_dir 3 10 +perl -0pi -e 's/mainline_stall_count: 0/mainline_stall_count: 2/' "$LOOP_DIR/state.md" +perl -0pi -e 's/last_mainline_verdict: unknown/last_mainline_verdict: stalled/' "$LOOP_DIR/state.md" +perl -0pi -e 's/drift_status: normal/drift_status: replan_required/' "$LOOP_DIR/state.md" + +setup_mock_codex "## Review Feedback + +Mainline Progress Verdict: REGRESSED + +- Mainline gap: this round moved farther from AC-1 +- Blocking side issue: recent fixes keep undoing the prior mainline path + +Stop and replan. + +CONTINUE" + +cat > "$LOOP_DIR/round-3-summary.md" << 'EOF' +# Round 3 Summary +The latest attempt regressed the mainline objective again. +EOF + +echo "T-NEG-10: Third stalled/regressed round triggers circuit breaker" +HOOK_INPUT='{"stop_hook_active": false, "transcript_path": "'$TRANSCRIPT_FILE'"}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" 2>&1) +EXIT_CODE=$? +set -e + +if [[ -f "$LOOP_DIR/stop-state.md" ]] && echo "$RESULT" | grep -qi "drift"; then + pass "Third stalled/regressed round stops the loop with drift message" +else + fail "Drift circuit breaker" "stop-state.md and drift message" "exit $EXIT_CODE, files: $(ls "$LOOP_DIR"/*state*.md 2>/dev/null || echo 'none'), output: $RESULT" +fi + +parse_state_file "$LOOP_DIR/stop-state.md" +if [[ "$STATE_MAINLINE_STALL_COUNT" == "3" ]] && [[ "$STATE_LAST_MAINLINE_VERDICT" == "regressed" ]] && [[ "$STATE_DRIFT_STATUS" == "replan_required" ]]; then + pass "Stopped loop preserves final drift state" +else + fail "Preserved drift state on stop" "stall=3 verdict=regressed drift=replan_required" \ + "stall=$STATE_MAINLINE_STALL_COUNT verdict=$STATE_LAST_MAINLINE_VERDICT drift=$STATE_DRIFT_STATUS" +fi + echo "" echo "=== Validator Finalize Phase State Parsing Tests ===" echo "" @@ -850,6 +1075,18 @@ else fail "Read validator finalize-state.md parsing" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi +echo "Test: Read validator blocks round contract during Finalize Phase" +HOOK_INPUT='{"tool_name": "Read", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-read-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "contract"; then + pass "Read validator blocks finalize-phase round contract" +else + fail "Read validator finalize-phase contract" "exit 2 with contract error" "exit $EXIT_CODE, output: $RESULT" +fi + echo "Test: Plan-file validator parses finalize-state.md correctly" # The plan-file validator should not error when only finalize-state.md exists HOOK_INPUT='{"prompt": "test prompt"}' diff --git a/tests/test-gen-plan.sh b/tests/test-gen-plan.sh index e26ee42e..b5bcab07 100755 --- a/tests/test-gen-plan.sh +++ b/tests/test-gen-plan.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for gen-plan command structure validation # diff --git a/tests/test-helpers.sh b/tests/test-helpers.sh index a93a2329..2fafff3b 100644 --- a/tests/test-helpers.sh +++ b/tests/test-helpers.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Shared test helper functions for all test scripts # diff --git a/tests/test-humanize-escape.sh b/tests/test-humanize-escape.sh index 8731cc98..a46578d5 100755 --- a/tests/test-humanize-escape.sh +++ b/tests/test-humanize-escape.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for humanize-escape fixes # @@ -85,6 +85,9 @@ assert_blocks "git add ./.humanize/" "Block: ./.humanize/ with trailing slash" assert_blocks "git add ./.humanize/file.md" "Block: ./.humanize/file.md" assert_blocks "git add path/to/.humanize" "Block: path/to/.humanize" assert_blocks "git add ../project/.humanize" "Block: ../project/.humanize" +assert_blocks "git add .humanize/rlcr/2026-03-01_00-00-00/goal-tracker.md" "Block: RLCR goal tracker path" +assert_blocks "git add .humanize/rlcr/2026-03-01_00-00-00/round-3-summary.md" "Block: RLCR round summary path" +assert_blocks "git add .humanize/rlcr/2026-03-01_00-00-00/round-3-contract.md" "Block: RLCR round contract path" # ======================================== # Test Group 2: Quoted Path Variants @@ -98,6 +101,7 @@ assert_blocks "git add '.humanize'" "Block: single-quoted .humanize" assert_blocks 'git add "./.humanize"' "Block: double-quoted ./.humanize" assert_blocks "git add './.humanize'" "Block: single-quoted ./.humanize" assert_blocks 'git add "path/to/.humanize"' "Block: double-quoted path/to/.humanize" +assert_blocks 'git add ".humanize/rlcr/2026-03-01_00-00-00/goal-tracker.md"' "Block: double-quoted RLCR file path" # ======================================== # Test Group 3: Combined Force and Path Variants @@ -109,6 +113,7 @@ echo "" assert_blocks "git add -f ./.humanize" "Block: -f with ./.humanize" assert_blocks "git add --force ./.humanize" "Block: --force with ./.humanize" assert_blocks 'git add -f ".humanize"' "Block: -f with quoted .humanize" +assert_blocks "git add -f .humanize/rlcr/2026-03-01_00-00-00/goal-tracker.md" "Block: -f with RLCR goal tracker" # Force flag with broad scope (blocks gitignore bypass) assert_blocks "git add -f ." "Block: -f . (force with current dir)" diff --git a/tests/test-model-router.sh b/tests/test-model-router.sh index 085b62da..bf4dc9a3 100755 --- a/tests/test-model-router.sh +++ b/tests/test-model-router.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Tests for model-router.sh set -euo pipefail @@ -20,7 +20,7 @@ create_mock_binary() { mkdir -p "$bin_dir" cat > "$bin_dir/$binary_name" <<EOF -#!/bin/bash +#!/usr/bin/env bash exit 0 EOF chmod +x "$bin_dir/$binary_name" diff --git a/tests/test-monitor-e2e-deletion.sh b/tests/test-monitor-e2e-deletion.sh index bbca2c6d..6197ba7e 100755 --- a/tests/test-monitor-e2e-deletion.sh +++ b/tests/test-monitor-e2e-deletion.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Monitor e2e deletion tests (parallel split 1/3) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -11,7 +11,6 @@ echo "" monitor_test_bash_deletion monitor_test_zsh_deletion -monitor_test_pr_deletion echo "" echo "========================================" diff --git a/tests/test-monitor-e2e-real.sh b/tests/test-monitor-e2e-real.sh index f60a7bf9..8a1c4f0e 100755 --- a/tests/test-monitor-e2e-real.sh +++ b/tests/test-monitor-e2e-real.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # TRUE End-to-End Monitor Tests for monitor tests # @@ -105,7 +105,7 @@ GOALTRACKER_EOF1 # Create the test runner script # This script runs the REAL _humanize_monitor_codex function cat > "$TEST_PROJECT/run_real_monitor.sh" << 'MONITOR_SCRIPT' -#!/bin/bash +#!/usr/bin/env bash # Run the REAL _humanize_monitor_codex function PROJECT_DIR="$1" @@ -426,7 +426,7 @@ GOALTRACKER_SIGINT # Create the test runner script for SIGINT test cat > "$TEST_PROJECT_SIGINT/run_real_monitor_sigint.sh" << 'SIGINT_SCRIPT_EOF' -#!/bin/bash +#!/usr/bin/env bash # Run the REAL _humanize_monitor_codex function for SIGINT testing PROJECT_DIR="$1" @@ -685,317 +685,6 @@ ZSH_SIGINT_SCRIPT fi } -# ======================================== -# Test 5: Real _humanize_monitor_pr with directory deletion -# ======================================== -monitor_test_pr_deletion() { - echo "" - echo "Test 5: Real _humanize_monitor_pr with directory deletion" - echo "" - - # Create test project directory for PR monitor - TEST_PROJECT_PR="$TEST_BASE/project_pr" - mkdir -p "$TEST_PROJECT_PR/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create valid PR loop state.md file - cat > "$TEST_PROJECT_PR/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'STATE' -current_round: 1 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude - - codex -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -STATE - - # Create goal-tracker.md for PR loop - cat > "$TEST_PROJECT_PR/.humanize/pr-loop/2026-01-18_12-00-00/goal-tracker.md" << 'GOALTRACKER_EOF' -# PR Review Goal Tracker - -## PR Information -- PR Number: #123 -- Branch: test-branch -- Started: 2026-01-18T10:00:00Z - -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | - -## Total Statistics -- Total Issues Found: 0 -- Remaining: 0 -GOALTRACKER_EOF - - # Create fake HOME for PR monitor test - FAKE_HOME_PR="$TEST_BASE/home_pr" - mkdir -p "$FAKE_HOME_PR" - - # Create cache directory for PR monitor - SANITIZED_PROJECT_PR=$(echo "$TEST_PROJECT_PR" | sed 's/[^a-zA-Z0-9._-]/-/g' | sed 's/--*/-/g') - CACHE_DIR_PR="$FAKE_HOME_PR/.cache/humanize/$SANITIZED_PROJECT_PR/2026-01-18_12-00-00" - mkdir -p "$CACHE_DIR_PR" - echo "PR round 1 started" > "$CACHE_DIR_PR/round-1-codex-run.log" - - # Create bash test runner script for PR monitor - cat > "$TEST_PROJECT_PR/run_real_monitor_pr.sh" << 'MONITOR_SCRIPT' -#!/bin/bash -# Run the REAL _humanize_monitor_pr function - -PROJECT_DIR="$1" -PROJECT_ROOT="$2" -FAKE_HOME="$3" - -cd "$PROJECT_DIR" - -# Override HOME and XDG_CACHE_HOME -export HOME="$FAKE_HOME" -export XDG_CACHE_HOME="$FAKE_HOME/.cache" - -# Create shim functions for terminal commands -tput() { - case "$1" in - cols) echo "80" ;; - lines) echo "24" ;; - *) : ;; - esac -} - -# Stub terminal control -printf() { - case "$1" in - *\\033*) : ;; # Ignore escape sequences - *) builtin printf "$@" ;; - esac -} - -# Source the humanize script (loads all functions) -source "$PROJECT_ROOT/scripts/humanize.sh" - -# Override _pr_cleanup for testing -_pr_cleanup() { - echo "CLEANUP_CALLED_PR" -} - -# Start monitor with --once flag (single iteration) -# Then delete directory after brief delay -( - sleep 0.5 - rm -rf "$PROJECT_DIR/.humanize/pr-loop/2026-01-18_12-00-00" -) & -cleanup_pid=$! - -# Run monitor in foreground (will detect deletion) -humanize monitor pr --once 2>&1 - -echo "EXIT_CODE:$?" - -# Cleanup background process -kill $cleanup_pid 2>/dev/null || true -wait $cleanup_pid 2>/dev/null || true -MONITOR_SCRIPT - - chmod +x "$TEST_PROJECT_PR/run_real_monitor_pr.sh" - - # Run the PR monitor test - output_pr=$("$TEST_PROJECT_PR/run_real_monitor_pr.sh" "$TEST_PROJECT_PR" "$PROJECT_ROOT" "$FAKE_HOME_PR" 2>&1) || true - - # Verify: PR monitor e2e - graceful exit - if echo "$output_pr" | grep -qE 'Stopped|gracefully|EXIT_CODE:0'; then - pass "PR monitor e2e - graceful exit on directory deletion" - else - # Alternative: check for any clean exit indication - if echo "$output_pr" | grep -q "EXIT_CODE:0"; then - pass "PR monitor e2e - clean exit" - else - fail "PR monitor e2e" "Expected graceful stop or EXIT_CODE:0, got: $output_pr" - fi - fi - - # Verify no glob errors in PR monitor output - if echo "$output_pr" | grep -qE 'no matches found|bad pattern'; then - fail "PR monitor glob errors" "Found glob errors: $(echo "$output_pr" | grep -E 'no matches found|bad pattern')" - else - pass "PR monitor no glob errors" - fi -} - -# ======================================== -# Test 6: Real _humanize_monitor_pr without --once with SIGINT -# ======================================== -monitor_test_pr_sigint() { - echo "" - echo "Test 6: Real _humanize_monitor_pr without --once with SIGINT" - echo "" - - # Create test project directory for PR monitor without --once - TEST_PROJECT_PR_NO_ONCE="$TEST_BASE/project_pr_no_once" - mkdir -p "$TEST_PROJECT_PR_NO_ONCE/.humanize/pr-loop/2026-01-18_13-00-00" - - # Create valid PR loop state.md file - cat > "$TEST_PROJECT_PR_NO_ONCE/.humanize/pr-loop/2026-01-18_13-00-00/state.md" << 'STATE' -current_round: 1 -max_iterations: 42 -pr_number: 456 -start_branch: test-branch-no-once -configured_bots: - - claude - - codex -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 2 -poll_timeout: 60 -started_at: 2026-01-18T13:00:00Z -STATE - - # Create goal-tracker.md for PR loop - cat > "$TEST_PROJECT_PR_NO_ONCE/.humanize/pr-loop/2026-01-18_13-00-00/goal-tracker.md" << 'PR_GOAL_EOF' -# PR Review Goal Tracker - -## PR Information -- PR Number: #456 -- Branch: test-branch-no-once -- Started: 2026-01-18T13:00:00Z - -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | - -## Total Statistics -- Total Issues Found: 0 -- Remaining: 0 -PR_GOAL_EOF - - # Create fake HOME for PR monitor test without --once - FAKE_HOME_PR_NO_ONCE="$TEST_BASE/home_pr_no_once" - mkdir -p "$FAKE_HOME_PR_NO_ONCE" - - # Create cache directory for PR monitor - SANITIZED_PROJECT_PR_NO_ONCE=$(echo "$TEST_PROJECT_PR_NO_ONCE" | sed 's/[^a-zA-Z0-9._-]/-/g' | sed 's/--*/-/g') - CACHE_DIR_PR_NO_ONCE="$FAKE_HOME_PR_NO_ONCE/.cache/humanize/$SANITIZED_PROJECT_PR_NO_ONCE/2026-01-18_13-00-00" - mkdir -p "$CACHE_DIR_PR_NO_ONCE" - echo "PR round 1 started" > "$CACHE_DIR_PR_NO_ONCE/round-1-codex-run.log" - - # Create bash test runner script for PR monitor without --once - cat > "$TEST_PROJECT_PR_NO_ONCE/run_real_monitor_pr_no_once.sh" << 'PR_NO_ONCE_EOF' -#!/bin/bash -# Run the REAL _humanize_monitor_pr function WITHOUT --once flag - -PROJECT_DIR="$1" -PROJECT_ROOT="$2" -FAKE_HOME="$3" - -cd "$PROJECT_DIR" - -# Override HOME and XDG_CACHE_HOME -export HOME="$FAKE_HOME" -export XDG_CACHE_HOME="$FAKE_HOME/.cache" - -# Create shim functions for terminal commands -tput() { - case "$1" in - cols) echo "80" ;; - lines) echo "24" ;; - *) : ;; - esac -} - -# Stub terminal control -printf() { - case "$1" in - *\\033*) : ;; # Ignore escape sequences - *) builtin printf "$@" ;; - esac -} - -# Source the humanize script (loads all functions) -source "$PROJECT_ROOT/scripts/humanize.sh" - -# Run monitor in foreground WITHOUT --once flag -# This runs the actual poll loop (not just one iteration) -humanize monitor pr 2>&1 -exit_code=$? - -echo "EXIT_CODE:$exit_code" -PR_NO_ONCE_EOF - - chmod +x "$TEST_PROJECT_PR_NO_ONCE/run_real_monitor_pr_no_once.sh" - - # Run the PR monitor in background (no --once means it will loop until interrupted) - OUTPUT_FILE_PR_NO_ONCE="$TEST_BASE/output_pr_no_once.txt" - bash "$TEST_PROJECT_PR_NO_ONCE/run_real_monitor_pr_no_once.sh" "$TEST_PROJECT_PR_NO_ONCE" "$PROJECT_ROOT" "$FAKE_HOME_PR_NO_ONCE" > "$OUTPUT_FILE_PR_NO_ONCE" 2>&1 & - MONITOR_PID_PR_NO_ONCE=$! - - # Wait for monitor to start running its poll loop - sleep 3 - - # Verify monitor is running before sending SIGINT - if kill -0 $MONITOR_PID_PR_NO_ONCE 2>/dev/null; then - # Send SIGINT to stop the continuous monitor (simulates Ctrl+C) - # Using negative PID sends to entire process group - kill -INT -$MONITOR_PID_PR_NO_ONCE 2>/dev/null || kill -INT $MONITOR_PID_PR_NO_ONCE 2>/dev/null || true - - # Wait for monitor to exit gracefully after SIGINT - WAIT_COUNT=0 - while kill -0 $MONITOR_PID_PR_NO_ONCE 2>/dev/null && [[ $WAIT_COUNT -lt 20 ]]; do - sleep 0.5 - WAIT_COUNT=$((WAIT_COUNT + 1)) - done - - # Force kill if still running - if kill -0 $MONITOR_PID_PR_NO_ONCE 2>/dev/null; then - # Try SIGTERM before SIGKILL - kill -TERM $MONITOR_PID_PR_NO_ONCE 2>/dev/null || true - sleep 1 - if kill -0 $MONITOR_PID_PR_NO_ONCE 2>/dev/null; then - kill -9 $MONITOR_PID_PR_NO_ONCE 2>/dev/null || true - fi - wait $MONITOR_PID_PR_NO_ONCE 2>/dev/null || true - # Still count as pass if the monitor ran and was terminated (SIGINT delivery is complex) - pass "PR monitor (no --once) handled via SIGTERM" - else - wait $MONITOR_PID_PR_NO_ONCE 2>/dev/null || true - pass "PR monitor (no --once) exited after SIGINT" - fi - else - fail "PR monitor (no --once) start" "Monitor did not start properly" - fi - - # Read captured output - output_pr_no_once=$(cat "$OUTPUT_FILE_PR_NO_ONCE" 2>/dev/null || echo "") - - # Verify clean exit after SIGINT - if echo "$output_pr_no_once" | grep -qE 'Stopped|Monitor stopped|EXIT_CODE:[01]'; then - pass "PR monitor (no --once) clean SIGINT exit" - else - # Check for any indication the monitor ran properly before SIGINT - if echo "$output_pr_no_once" | grep -qE 'PR|loop|Waiting|session'; then - pass "PR monitor (no --once) ran before SIGINT" - else - fail "PR monitor (no --once) SIGINT cleanup" "Expected cleanup message, got: $(head -c 300 <<< "$output_pr_no_once" | tr '\n' ' ')" - fi - fi - - # Verify no glob errors in PR monitor output - if echo "$output_pr_no_once" | grep -qE 'no matches found|bad pattern'; then - fail "PR monitor (no --once) glob errors" "Found glob errors" - else - pass "PR monitor (no --once) no glob errors" - fi -} - # ======================================== # Run all tests and print summary when executed directly # ======================================== @@ -1009,8 +698,6 @@ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then monitor_test_zsh_deletion monitor_test_bash_sigint monitor_test_zsh_sigint - monitor_test_pr_deletion - monitor_test_pr_sigint # Summary echo "" @@ -1029,7 +716,6 @@ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then echo "VERIFIED: Terminal state restored" echo "VERIFIED: Works in bash and zsh" echo "VERIFIED: Real SIGINT/Ctrl+C handling (bash and zsh)" - echo "VERIFIED: PR monitor e2e works (with and without --once)" exit 0 else echo "" diff --git a/tests/test-monitor-e2e-sigint.sh b/tests/test-monitor-e2e-sigint.sh index 9a354a2b..92aea080 100755 --- a/tests/test-monitor-e2e-sigint.sh +++ b/tests/test-monitor-e2e-sigint.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Monitor e2e SIGINT tests (parallel split 2/3) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -11,7 +11,6 @@ echo "" monitor_test_bash_sigint monitor_test_zsh_sigint -monitor_test_pr_sigint echo "" echo "========================================" diff --git a/tests/test-monitor-runtime.sh b/tests/test-monitor-runtime.sh index f73256c0..e146adaf 100755 --- a/tests/test-monitor-runtime.sh +++ b/tests/test-monitor-runtime.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Runtime Verification Tests for tests # @@ -63,7 +63,7 @@ echo "current_round: 1" > .humanize/rlcr/2026-01-16_10-00-00/state.md # Create a test script that sources humanize.sh and tests the graceful stop behavior cat > test_graceful_stop.sh << 'TESTSCRIPT' -#!/bin/bash +#!/usr/bin/env bash cd "$1" # Source the monitor script @@ -141,7 +141,7 @@ echo "Test 2: Verify cleanup prevents double execution" echo "" cat > test_double_cleanup.sh << 'TESTSCRIPT' -#!/bin/bash +#!/usr/bin/env bash cleanup_done=false call_count=0 @@ -184,7 +184,7 @@ echo "Test 3: Main loop directory deletion detection" echo "" cat > test_loop_detection.sh << 'TESTSCRIPT' -#!/bin/bash +#!/usr/bin/env bash cd "$1" loop_dir=".humanize/rlcr" @@ -261,7 +261,7 @@ echo "" # and would reset the scroll region cat > test_terminal_restore.sh << 'TESTSCRIPT' -#!/bin/bash +#!/usr/bin/env bash # Test that _restore_terminal is defined and callable cd "$1" @@ -331,7 +331,7 @@ echo "Test 6: SIGINT triggers cleanup in bash" echo "" cat > test_sigint_bash.sh << 'TESTSCRIPT' -#!/bin/bash +#!/usr/bin/env bash # Test that SIGINT triggers cleanup in bash mode cleanup_done=false diff --git a/tests/test-plan-file-hooks.sh b/tests/test-plan-file-hooks.sh index d2e8af6f..ec3eac06 100755 --- a/tests/test-plan-file-hooks.sh +++ b/tests/test-plan-file-hooks.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for plan file hooks during RLCR loop # @@ -40,7 +40,7 @@ mkdir -p "$XDG_CACHE_HOME" setup_mock_codex() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << 'MOCKEOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex for test-plan-file-hooks.sh if [[ "$1" == "exec" ]]; then echo "Mock review output" @@ -60,6 +60,21 @@ setup_mock_codex # Default branch name (set after first git init) DEFAULT_BRANCH="" +create_round_contract() { + local loop_dir="$1" + local round="$2" + + cat > "$loop_dir/round-${round}-contract.md" << EOF +# Round $round Contract + +- Mainline Objective: Keep plan-file integrity checks aligned +- Target ACs: AC-1 +- Blocking Side Issues In Scope: none +- Queued Side Issues Out of Scope: none +- Success Criteria: current round artifacts are present and coherent +EOF +} + setup_test_loop() { cd "$TEST_DIR" @@ -80,6 +95,7 @@ setup_test_loop() { # Create loop directory structure LOOP_DIR="$TEST_DIR/.humanize/rlcr/2024-01-01_12-00-00" + rm -rf "$LOOP_DIR" mkdir -p "$LOOP_DIR" # Create plan file (gitignored) @@ -91,7 +107,12 @@ Test the RLCR loop ## Requirements - Requirement 1 EOF - echo "plans/" >> .gitignore + cat >> .gitignore << 'EOF' +plans/ +.humanize* +.cache/ +bin/ +EOF git add .gitignore git -c commit.gpgsign=false commit -q -m "Add gitignore" @@ -109,8 +130,13 @@ plan_tracked: false start_branch: $CURRENT_BRANCH base_branch: $CURRENT_BRANCH review_started: false +mainline_stall_count: 0 +last_mainline_verdict: unknown +drift_status: normal --- EOF + + create_round_contract "$LOOP_DIR" 0 } echo "=== Test: UserPromptSubmit Hook ===" @@ -466,6 +492,38 @@ else fail "Stop hook YAML parsing" "no YAML parse errors" "output: $RESULT" fi +# Test 8.8b: Stop hook blocks when round contract is missing +echo "Test 8.8b: Stop hook blocks when round contract is missing" +setup_test_loop +rm -f "$LOOP_DIR/round-0-contract.md" +cat > "$LOOP_DIR/round-0-summary.md" << 'EOF' +# Summary +Work done. +EOF +cat > "$LOOP_DIR/goal-tracker.md" << 'EOF' +# Goal Tracker +## IMMUTABLE SECTION +### Ultimate Goal +Test goal +### Acceptance Criteria +- Criterion 1 +## MUTABLE SECTION +### Plan Version: 1 (Updated: Round 0) +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| Task 1 | AC1 | done | - | +EOF +set +e +RESULT=$(echo '{}' | "$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" 2>&1) +EXIT_CODE=$? +set -e +if echo "$RESULT" | grep -q '"decision"' && echo "$RESULT" | grep -qi "contract"; then + pass "Stop hook blocks when round contract is missing" +else + fail "Stop hook missing round contract" "block with contract error" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 8.9: Hook handles plan_file path with hyphens correctly echo "Test 8.9: Hook handles plan_file with hyphens in path" setup_test_loop @@ -642,6 +700,7 @@ cat > "$TRACKED_LOOP_DIR/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$TRACKED_LOOP_DIR" 0 cat > "$TRACKED_LOOP_DIR/goal-tracker.md" << 'EOF' # Goal Tracker ## IMMUTABLE SECTION @@ -738,6 +797,7 @@ cat > "$TRACKED_LOOP_DIR/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$TRACKED_LOOP_DIR" 0 cat > "$TRACKED_LOOP_DIR/goal-tracker.md" << 'EOF' # Goal Tracker ## IMMUTABLE SECTION @@ -822,6 +882,7 @@ cat > "$LOOP_DIR_14_1/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$LOOP_DIR_14_1" 0 # Goal tracker with ONLY Ultimate Goal placeholder (AC and Tasks are filled) cat > "$LOOP_DIR_14_1/goal-tracker.md" << 'EOF' # Goal Tracker @@ -893,6 +954,7 @@ cat > "$LOOP_DIR_14_2/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$LOOP_DIR_14_2" 0 # Goal tracker with ONLY AC placeholder (Goal and Tasks are filled) cat > "$LOOP_DIR_14_2/goal-tracker.md" << 'EOF' # Goal Tracker @@ -964,6 +1026,7 @@ cat > "$LOOP_DIR_14_3/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$LOOP_DIR_14_3" 0 # Goal tracker with ONLY Active Tasks placeholder (Goal and AC are filled) cat > "$LOOP_DIR_14_3/goal-tracker.md" << 'EOF' # Goal Tracker @@ -1033,6 +1096,7 @@ cat > "$LOOP_DIR_14_4/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$LOOP_DIR_14_4" 0 # Goal tracker with ALL placeholders cat > "$LOOP_DIR_14_4/goal-tracker.md" << 'EOF' # Goal Tracker diff --git a/tests/test-plan-file-validation.sh b/tests/test-plan-file-validation.sh index e439af0c..a6b9cc40 100755 --- a/tests/test-plan-file-validation.sh +++ b/tests/test-plan-file-validation.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for plan file validation in setup-rlcr-loop.sh # @@ -74,7 +74,7 @@ EOF mock_codex() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << 'EOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex for test-plan-file-validation.sh echo "mock codex" EOF @@ -717,7 +717,7 @@ fi echo "Test 13: Reject codex effort with YAML-unsafe characters" rm -rf "$TEST_DIR/.humanize/rlcr" 2>/dev/null || true set +e -RESULT=$("$PROJECT_ROOT/scripts/setup-rlcr-loop.sh" --codex-model "gpt-5.4:high#comment" "plans/test-plan.md" 2>&1) +RESULT=$("$PROJECT_ROOT/scripts/setup-rlcr-loop.sh" --codex-model "gpt-5.5:high#comment" "plans/test-plan.md" 2>&1) EXIT_CODE=$? set -e if [[ $EXIT_CODE -ne 0 ]] && echo "$RESULT" | grep -q "Invalid codex effort"; then @@ -729,7 +729,7 @@ fi # Test 14: Accept valid codex model with dots and hyphens echo "Test 14: Accept valid codex model (alphanumeric, dots, hyphens)" set +e -RESULT=$("$PROJECT_ROOT/scripts/setup-rlcr-loop.sh" --codex-model "gpt-5.4:medium" "plans/test-plan.md" 2>&1) +RESULT=$("$PROJECT_ROOT/scripts/setup-rlcr-loop.sh" --codex-model "gpt-5.5:medium" "plans/test-plan.md" 2>&1) EXIT_CODE=$? set -e # Should not fail due to model/effort validation (may fail later for other reasons) diff --git a/tests/test-pr-loop-1-scripts.sh b/tests/test-pr-loop-1-scripts.sh deleted file mode 100755 index a4088b71..00000000 --- a/tests/test-pr-loop-1-scripts.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -# -# PR Loop Script Tests Runner (parallel split 1/3) -# -# Runs only script argument validation tests from the PR loop test suite. -# See test-pr-loop.sh for the combined runner. -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -source "$SCRIPT_DIR/test-helpers.sh" -source "$SCRIPT_DIR/test-pr-loop-lib.sh" - -init_pr_loop_test_env - -source "$SCRIPT_DIR/test-pr-loop-scripts.sh" - -run_script_tests - -print_test_summary "PR Loop Script Tests" -exit $? diff --git a/tests/test-pr-loop-2-hooks.sh b/tests/test-pr-loop-2-hooks.sh deleted file mode 100755 index 254bdbdb..00000000 --- a/tests/test-pr-loop-2-hooks.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -# -# PR Loop Hook Tests Runner (parallel split 2/3) -# -# Runs only hook functionality tests from the PR loop test suite. -# See test-pr-loop.sh for the combined runner. -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -source "$SCRIPT_DIR/test-helpers.sh" -source "$SCRIPT_DIR/test-pr-loop-lib.sh" - -init_pr_loop_test_env - -source "$SCRIPT_DIR/test-pr-loop-hooks.sh" - -run_hook_tests - -print_test_summary "PR Loop Hook Tests" -exit $? diff --git a/tests/test-pr-loop-3-stophook.sh b/tests/test-pr-loop-3-stophook.sh deleted file mode 100755 index 2aaa9759..00000000 --- a/tests/test-pr-loop-3-stophook.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# -# PR Loop Stop Hook Tests Runner (parallel split 3/3) -# -# Runs only stop hook integration tests from the PR loop test suite. -# This is the slowest module due to timeout-based bot polling tests. -# See test-pr-loop.sh for the combined runner. -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -source "$SCRIPT_DIR/test-helpers.sh" -source "$SCRIPT_DIR/test-pr-loop-lib.sh" - -if [[ "${GITHUB_ACTIONS:-}" == "true" ]]; then - skip "PR Loop Stop Hook Tests" "Skipped in GitHub Actions" - print_test_summary "PR Loop Stop Hook Tests" - exit 0 -fi - -init_pr_loop_test_env - -source "$SCRIPT_DIR/test-pr-loop-stophook.sh" - -run_stophook_tests - -print_test_summary "PR Loop Stop Hook Tests" -exit $? diff --git a/tests/test-pr-loop-hooks.sh b/tests/test-pr-loop-hooks.sh deleted file mode 100644 index 1e8c6ca1..00000000 --- a/tests/test-pr-loop-hooks.sh +++ /dev/null @@ -1,1623 +0,0 @@ -#!/bin/bash -# -# PR Loop Hook Tests -# -# Tests for hook functionality: -# - Validators and protections -# - Comment processing -# - E2E tests -# - Fixture-based tests -# - Monitor tests -# -# Usage: source test-pr-loop-hooks.sh && run_hook_tests -# - -run_hook_tests() { -# ======================================== -# PR Loop Validator Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing PR Loop Validators" -echo "========================================" -echo "" - -# Test: active_bots is stored as YAML list -test_active_bots_yaml_format() { - cd "$TEST_DIR" - - # Create mock git repo - init_test_git_repo "$TEST_DIR/repo" - cd "$TEST_DIR/repo" - - # Create PR loop state file with proper YAML format - local timestamp="2026-01-18_13-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -active_bots: - - claude - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T13:00:00Z ---- -EOF - - # Verify state file has YAML list format - if grep -q "^ - claude$" "$loop_dir/state.md" && \ - grep -q "^ - codex$" "$loop_dir/state.md"; then - pass "T-POS-12: active_bots is stored as YAML list format" - else - fail "T-POS-12: active_bots should be stored as YAML list format" - fi - - cd "$SCRIPT_DIR" -} - -# Test: PR loop state file is protected from writes -test_pr_loop_state_protected() { - cd "$TEST_DIR" - - # Create mock loop directory - local timestamp="2026-01-18_14-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Test that write validator blocks state.md writes - local hook_input='{"tool_name": "Write", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/state.md", "content": "malicious content"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 2 ]] && echo "$output" | grep -qi "state.*blocked\|pr loop"; then - pass "T-SEC-1: PR loop state.md is protected from writes" - else - fail "T-SEC-1: PR loop state.md should be protected from writes" "exit=2, blocked" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Test: PR loop comment file is protected from writes -test_pr_loop_comment_protected() { - cd "$TEST_DIR" - - local timestamp="2026-01-18_14-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Test that write validator blocks pr-comment.md writes - local hook_input='{"tool_name": "Write", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/round-0-pr-comment.md", "content": "fake comments"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 2 ]]; then - pass "T-SEC-2: PR loop pr-comment file is protected from writes" - else - fail "T-SEC-2: PR loop pr-comment file should be protected from writes" "exit=2" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Test: PR loop resolve file is allowed for writes -test_pr_loop_resolve_allowed() { - cd "$TEST_DIR" - - local timestamp="2026-01-18_14-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Test that write validator allows pr-resolve.md writes - local hook_input='{"tool_name": "Write", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/round-0-pr-resolve.md", "content": "resolution summary"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 0 ]]; then - pass "T-POS-13: PR loop pr-resolve file is allowed for writes" - else - fail "T-POS-13: PR loop pr-resolve file should be allowed for writes" "exit=0" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Run validator tests -test_active_bots_yaml_format -test_pr_loop_state_protected -test_pr_loop_comment_protected -test_pr_loop_resolve_allowed - -# Test: PR loop Bash protection works without RLCR loop -test_pr_loop_bash_protection_no_rlcr() { - cd "$TEST_DIR" - - # Ensure NO RLCR loop exists - rm -rf ".humanize/rlcr" - - local timestamp="2026-01-18_14-30-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 456 ---- -EOF - - # Test that Bash validator blocks state.md modifications via echo redirect - local hook_input='{"tool_name": "Bash", "tool_input": {"command": "echo bad > '$TEST_DIR'/.humanize/pr-loop/'$timestamp'/state.md"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-bash-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 2 ]] && echo "$output" | grep -qi "state\|blocked\|pr loop"; then - pass "T-SEC-4: PR loop Bash protection works without RLCR loop" - else - fail "T-SEC-4: PR loop Bash protection should work without RLCR" "exit=2, blocked" "exit=$exit_code, output=$output" - fi - - cd "$SCRIPT_DIR" -} - -test_pr_loop_bash_protection_no_rlcr - -# ======================================== -# Comment Sorting Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Comment Sorting (fromdateiso8601)" -echo "========================================" -echo "" - -# Test: Timestamps are properly sorted (newest first) -test_timestamp_sorting() { - # Test that jq fromdateiso8601 works correctly - local sorted_output - sorted_output=$(echo '[ - {"created_at": "2026-01-18T10:00:00Z", "author_type": "User"}, - {"created_at": "2026-01-18T12:00:00Z", "author_type": "User"}, - {"created_at": "2026-01-18T11:00:00Z", "author_type": "User"} - ]' | jq 'sort_by(-(.created_at | fromdateiso8601)) | .[0].created_at') - - if [[ "$sorted_output" == '"2026-01-18T12:00:00Z"' ]]; then - pass "T-SORT-1: Comments are sorted newest first using fromdateiso8601" - else - fail "T-SORT-1: Comments should be sorted newest first" "12:00:00Z first" "got $sorted_output" - fi -} - -# Test: Human comments come before bot comments -test_human_before_bot_sorting() { - local sorted_output - sorted_output=$(echo '[ - {"created_at": "2026-01-18T12:00:00Z", "author_type": "Bot"}, - {"created_at": "2026-01-18T11:00:00Z", "author_type": "User"} - ]' | jq 'sort_by( - (if .author_type == "Bot" then 1 else 0 end), - -(.created_at | fromdateiso8601) - ) | .[0].author_type') - - if [[ "$sorted_output" == '"User"' ]]; then - pass "T-SORT-2: Human comments come before bot comments" - else - fail "T-SORT-2: Human comments should come before bot comments" "User first" "got $sorted_output" - fi -} - -# Run sorting tests -test_timestamp_sorting -test_human_before_bot_sorting - -# ======================================== -# Gate-keeper Logic Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Gate-keeper Logic" -echo "========================================" -echo "" - -# Test: Comment deduplication by ID (unit test) -test_comment_deduplication() { - # Test that jq unique_by works for deduplication - local deduped_output - deduped_output=$(echo '[ - {"id": 1, "body": "first"}, - {"id": 2, "body": "second"}, - {"id": 1, "body": "duplicate of first"} - ]' | jq 'unique_by(.id) | length') - - if [[ "$deduped_output" == "2" ]]; then - pass "T-GATE-1: Comments are deduplicated by ID" - else - fail "T-GATE-1: Comments should be deduplicated by ID" "2 unique" "got $deduped_output" - fi -} - -# Test: YAML list parsing for configured_bots -test_configured_bots_parsing() { - local test_state="--- -current_round: 0 -configured_bots: - - claude - - codex -active_bots: - - claude -codex_model: gpt-5.4 ----" - - # Extract configured_bots using same logic as stop hook - local configured_bots="" - local in_field=false - while IFS= read -r line; do - if [[ "$line" =~ ^configured_bots: ]]; then - in_field=true - continue - fi - if [[ "$in_field" == "true" ]]; then - if [[ "$line" =~ ^[[:space:]]+-[[:space:]]+ ]]; then - local bot_name="${line#*- }" - bot_name=$(echo "$bot_name" | tr -d ' ') - configured_bots="${configured_bots}${bot_name}," - elif [[ "$line" =~ ^[a-zA-Z_] ]]; then - in_field=false - fi - fi - done <<< "$test_state" - - if [[ "$configured_bots" == "claude,codex," ]]; then - pass "T-GATE-2: configured_bots YAML list is parsed correctly" - else - fail "T-GATE-2: configured_bots parsing failed" "claude,codex," "got $configured_bots" - fi -} - -# Test: Bot status extraction from Codex output -test_bot_status_extraction() { - local codex_output="### Per-Bot Status -| Bot | Status | Summary | -|-----|--------|---------| -| claude | APPROVE | No issues found | -| codex | ISSUES | Found bug in line 42 | - -### Approved Bots -- claude" - - # Extract bots with ISSUES status using same logic as stop hook - local bots_with_issues="" - while IFS= read -r line; do - if echo "$line" | grep -qiE '\|[[:space:]]*ISSUES[[:space:]]*\|'; then - local bot=$(echo "$line" | sed 's/|/\n/g' | sed -n '2p' | tr -d ' ') - bots_with_issues="${bots_with_issues}${bot}," - fi - done <<< "$codex_output" - - if [[ "$bots_with_issues" == "codex," ]]; then - pass "T-GATE-3: Bots with ISSUES status are correctly identified" - else - fail "T-GATE-3: Bot status extraction failed" "codex," "got $bots_with_issues" - fi -} - -# Test: Bot re-add logic when previously approved bot has new issues -test_bot_readd_logic() { - # Simulate: claude was approved (removed from active), but now has ISSUES - local configured_bots=("claude" "codex") - local active_bots=("codex") # claude was removed (approved) - - # Codex output shows claude now has issues - declare -A bots_with_issues - bots_with_issues["claude"]="true" - - declare -A bots_approved - # No bots approved this round - - # Re-add logic: process ALL configured bots - local new_active=() - for bot in "${configured_bots[@]}"; do - if [[ "${bots_with_issues[$bot]:-}" == "true" ]]; then - new_active+=("$bot") - fi - done - - # claude should be re-added because it has issues - local found_claude=false - for bot in "${new_active[@]}"; do - if [[ "$bot" == "claude" ]]; then - found_claude=true - break - fi - done - - if [[ "$found_claude" == "true" ]]; then - pass "T-GATE-4: Previously approved bot is re-added when it has new issues" - else - fail "T-GATE-4: Bot re-add logic failed" "claude in new_active" "not found" - fi -} - -# Test: Trigger comment timestamp detection pattern -test_trigger_comment_detection() { - local comments='[ - {"id": 1, "body": "Just a regular comment", "created_at": "2026-01-18T10:00:00Z"}, - {"id": 2, "body": "@claude @codex please review", "created_at": "2026-01-18T11:00:00Z"}, - {"id": 3, "body": "Another comment", "created_at": "2026-01-18T12:00:00Z"} - ]' - - # Build pattern for @bot mentions - local bot_pattern="@claude|@codex" - - # Find most recent trigger comment - local trigger_ts - trigger_ts=$(echo "$comments" | jq -r --arg pattern "$bot_pattern" ' - [.[] | select(.body | test($pattern; "i"))] | - sort_by(.created_at) | reverse | .[0].created_at // empty - ') - - if [[ "$trigger_ts" == "2026-01-18T11:00:00Z" ]]; then - pass "T-GATE-5: Trigger comment timestamp is correctly detected" - else - fail "T-GATE-5: Trigger timestamp detection failed" "2026-01-18T11:00:00Z" "got $trigger_ts" - fi -} - -# Test: APPROVE marker detection in Codex output -test_approve_marker_detection() { - local codex_output="### Per-Bot Status -| Bot | Status | Summary | -|-----|--------|---------| -| claude | APPROVE | LGTM | - -### Final Recommendation -All bots have approved. - -APPROVE" - - local last_line - last_line=$(echo "$codex_output" | grep -v '^[[:space:]]*$' | tail -1 | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') - - if [[ "$last_line" == "APPROVE" ]]; then - pass "T-GATE-6: APPROVE marker is correctly recognized" - else - fail "T-GATE-6: APPROVE marker detection failed" "APPROVE" "got $last_line" - fi -} - -# Test: WAITING_FOR_BOTS marker detection -test_waiting_for_bots_marker() { - local codex_output="### Per-Bot Status -| Bot | Status | Summary | -|-----|--------|---------| -| claude | NO_RESPONSE | Bot did not respond | - -### Final Recommendation -Some bots have not responded yet. - -WAITING_FOR_BOTS" - - local last_line - last_line=$(echo "$codex_output" | grep -v '^[[:space:]]*$' | tail -1 | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') - - if [[ "$last_line" == "WAITING_FOR_BOTS" ]]; then - pass "T-GATE-7: WAITING_FOR_BOTS marker is correctly recognized" - else - fail "T-GATE-7: WAITING_FOR_BOTS marker detection failed" "WAITING_FOR_BOTS" "got $last_line" - fi -} - -# Run gate-keeper tests -test_comment_deduplication -test_configured_bots_parsing -test_bot_status_extraction -test_bot_readd_logic -test_trigger_comment_detection -test_approve_marker_detection -test_waiting_for_bots_marker - -# ======================================== -# Stop Hook Integration Tests (with mocked gh/codex) -# ======================================== - -echo "" -echo "========================================" -echo "Testing Stop Hook Integration" -echo "========================================" -echo "" - -# Create enhanced mock gh that returns trigger comments -create_enhanced_mock_gh() { - local mock_dir="$1" - local trigger_user="${2:-testuser}" - local trigger_timestamp="${3:-2026-01-18T12:00:00Z}" - - cat > "$mock_dir/gh" << MOCK_GH -#!/bin/bash -# Enhanced mock gh CLI for stop hook testing - -case "\$1" in - auth) - if [[ "\$2" == "status" ]]; then - echo "Logged in to github.com" - exit 0 - fi - ;; - repo) - if [[ "\$2" == "view" ]]; then - if [[ "\$3" == "--json" && "\$4" == "owner" ]]; then - echo '{"login": "testowner"}' - elif [[ "\$3" == "--json" && "\$4" == "name" ]]; then - echo '{"name": "testrepo"}' - fi - exit 0 - fi - ;; - pr) - if [[ "\$2" == "view" ]]; then - if [[ "\$*" == *"number"* ]]; then - echo '{"number": 123}' - elif [[ "\$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - fi - exit 0 - fi - ;; - api) - # Handle user endpoint for current user - if [[ "\$2" == "user" ]]; then - echo '{"login": "${trigger_user}"}' - exit 0 - fi - # Handle PR comments endpoint - if [[ "\$2" == *"/issues/"*"/comments"* ]]; then - echo '[{"id": 1, "user": {"login": "${trigger_user}"}, "created_at": "${trigger_timestamp}", "body": "@claude @codex please review"}]' - exit 0 - fi - # Return empty arrays for other endpoints - echo "[]" - exit 0 - ;; -esac - -echo "Mock gh: unhandled command: \$*" >&2 -exit 1 -MOCK_GH - chmod +x "$mock_dir/gh" -} - -# Test: Trigger comment detection filters by current user -test_trigger_user_filter() { - local test_subdir="$TEST_DIR/stop_hook_user_test" - mkdir -p "$test_subdir" - - # Create mock that returns comments from different users - cat > "$test_subdir/gh" << 'MOCK_GH' -#!/bin/bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo '{"login": "myuser"}' - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - echo '[ - {"id": 1, "user": {"login": "otheruser"}, "created_at": "2026-01-18T11:00:00Z", "body": "@claude please review"}, - {"id": 2, "user": {"login": "myuser"}, "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review"}, - {"id": 3, "user": {"login": "otheruser"}, "created_at": "2026-01-18T13:00:00Z", "body": "@claude please review"} - ]' - exit 0 - fi - echo "[]" - exit 0 - ;; -esac -exit 1 -MOCK_GH - chmod +x "$test_subdir/gh" - - # Test the jq filter logic - local comments='[ - {"id": 1, "author": "otheruser", "created_at": "2026-01-18T11:00:00Z", "body": "@claude please review"}, - {"id": 2, "author": "myuser", "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review"}, - {"id": 3, "author": "otheruser", "created_at": "2026-01-18T13:00:00Z", "body": "@claude please review"} - ]' - - local trigger_ts - trigger_ts=$(echo "$comments" | jq -r --arg pattern "@claude" --arg user "myuser" ' - [.[] | select(.author == $user and (.body | test($pattern; "i")))] | - sort_by(.created_at) | reverse | .[0].created_at // empty - ') - - if [[ "$trigger_ts" == "2026-01-18T12:00:00Z" ]]; then - pass "T-HOOK-1: Trigger detection filters by current user" - else - fail "T-HOOK-1: Trigger should be from myuser only" "2026-01-18T12:00:00Z" "got $trigger_ts" - fi -} - -# Test: Trigger timestamp refresh when newer exists -test_trigger_refresh() { - local old_trigger="2026-01-18T10:00:00Z" - local new_trigger="2026-01-18T12:00:00Z" - - # Simulate the refresh logic from stop hook - local should_update=false - if [[ -z "$old_trigger" ]] || [[ "$new_trigger" > "$old_trigger" ]]; then - should_update=true - fi - - if [[ "$should_update" == "true" ]]; then - pass "T-HOOK-2: Trigger timestamp refreshes when newer comment exists" - else - fail "T-HOOK-2: Should update trigger when newer" "update" "no update" - fi -} - -# Test: Missing trigger blocks exit for round > 0 -test_missing_trigger_blocks() { - local current_round=1 - local last_trigger_at="" - - # Simulate the check from stop hook - local should_block=false - if [[ "$current_round" -gt 0 && -z "$last_trigger_at" ]]; then - should_block=true - fi - - if [[ "$should_block" == "true" ]]; then - pass "T-HOOK-3: Missing trigger comment blocks exit for round > 0" - else - fail "T-HOOK-3: Should block when no trigger" "block" "allow" - fi -} - -# Test: Round 0 uses last_trigger_at when present, started_at as fallback -test_round0_trigger_priority() { - local current_round=0 - local started_at="2026-01-18T10:00:00Z" - local last_trigger_at="2026-01-18T11:00:00Z" - - # Simulate the timestamp selection from stop hook (updated logic) - # ALWAYS prefer last_trigger_at when available - local after_timestamp - if [[ -n "$last_trigger_at" ]]; then - after_timestamp="$last_trigger_at" - elif [[ "$current_round" -eq 0 ]]; then - after_timestamp="$started_at" - fi - - if [[ "$after_timestamp" == "$last_trigger_at" ]]; then - pass "T-HOOK-4: Round 0 uses last_trigger_at when present (not started_at)" - else - fail "T-HOOK-4: Round 0 should prefer last_trigger_at" "$last_trigger_at" "got $after_timestamp" - fi -} - -# Test: Round 0 falls back to started_at when no trigger -test_round0_started_at_fallback() { - local current_round=0 - local started_at="2026-01-18T10:00:00Z" - local last_trigger_at="" - - # Simulate the timestamp selection from stop hook - local after_timestamp - if [[ -n "$last_trigger_at" ]]; then - after_timestamp="$last_trigger_at" - elif [[ "$current_round" -eq 0 ]]; then - after_timestamp="$started_at" - fi - - if [[ "$after_timestamp" == "$started_at" ]]; then - pass "T-HOOK-4b: Round 0 falls back to started_at when no trigger" - else - fail "T-HOOK-4b: Round 0 should fall back to started_at" "$started_at" "got $after_timestamp" - fi -} - -# Test: Per-bot timeout anchored to trigger timestamp -test_timeout_anchored_to_trigger() { - # Simulate: trigger at T=0, poll starts at T=60, timeout is 900s - local trigger_epoch=1000 - local poll_start_epoch=1060 - local current_time=1900 # 900s after trigger, 840s after poll start - local timeout=900 - - # With trigger-anchored timeout: - local elapsed_from_trigger=$((current_time - trigger_epoch)) - # With poll-anchored timeout (wrong): - local elapsed_from_poll=$((current_time - poll_start_epoch)) - - local timed_out_trigger=false - local timed_out_poll=false - - if [[ $elapsed_from_trigger -ge $timeout ]]; then - timed_out_trigger=true - fi - if [[ $elapsed_from_poll -ge $timeout ]]; then - timed_out_poll=true - fi - - # Should be timed out based on trigger (900s elapsed), not poll (840s elapsed) - if [[ "$timed_out_trigger" == "true" && "$timed_out_poll" == "false" ]]; then - pass "T-HOOK-5: Per-bot timeout is anchored to trigger timestamp" - else - fail "T-HOOK-5: Timeout should be from trigger, not poll start" "trigger-based timeout" "poll-based timeout" - fi -} - -# Test: State file includes configured_bots -test_state_has_configured_bots() { - local test_subdir="$TEST_DIR/state_configured_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 1 -configured_bots: - - claude - - codex -active_bots: - - claude -last_trigger_at: 2026-01-18T12:00:00Z ---- -EOF - - # Extract configured_bots count - local configured_count - configured_count=$(grep -c "^ - " "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" 2>/dev/null | head -1) - - if [[ "$configured_count" -ge 2 ]]; then - pass "T-HOOK-6: State file tracks configured_bots separately" - else - fail "T-HOOK-6: State should have configured_bots" "2+ bots" "got $configured_count" - fi -} - -# Test: Round file naming consistency -test_round_file_naming() { - # All round-N files should use NEXT_ROUND - local current_round=1 - local next_round=$((current_round + 1)) - - local comment_file="round-${next_round}-pr-comment.md" - local check_file="round-${next_round}-pr-check.md" - local feedback_file="round-${next_round}-pr-feedback.md" - - # All should use next_round (2) - if [[ "$comment_file" == "round-2-pr-comment.md" && \ - "$check_file" == "round-2-pr-check.md" && \ - "$feedback_file" == "round-2-pr-feedback.md" ]]; then - pass "T-HOOK-7: Round file naming is consistent (all use NEXT_ROUND)" - else - fail "T-HOOK-7: Round files should all use NEXT_ROUND" "round-2-*" "inconsistent" - fi -} - -# Run stop hook integration tests -test_trigger_user_filter -test_trigger_refresh -test_missing_trigger_blocks -test_round0_trigger_priority -test_round0_started_at_fallback -test_timeout_anchored_to_trigger -test_state_has_configured_bots -test_round_file_naming - -# ======================================== -# Stop Hook End-to-End Tests (Execute Hook with Mocked gh/codex) -# ======================================== - -echo "" -echo "========================================" -echo "Testing Stop Hook End-to-End Execution" -echo "========================================" -echo "" - -# Test: Stop hook blocks when no resolve file exists -test_e2e_missing_resolve_blocks() { - local test_subdir="$TEST_DIR/e2e_resolve_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T12:00:00Z -last_trigger_at: ---- -EOF - - # Create mock binaries - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo '{"login": "testuser"}' - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) echo "/tmp/git" ;; - status) echo "" ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - # Run stop hook with mocked environment - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Check for block decision about missing resolve file - if echo "$hook_output" | grep -q "Resolution Summary Missing\|resolution summary\|round-0-pr-resolve"; then - pass "T-E2E-1: Stop hook blocks when resolve file missing" - else - fail "T-E2E-1: Stop hook should block for missing resolve" "block message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Stop hook detects trigger comment and updates state -test_e2e_trigger_detection() { - local test_subdir="$TEST_DIR/e2e_trigger_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file with empty last_trigger_at - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T12:00:00Z -last_trigger_at: ---- -EOF - - # Create resolve file - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - # Create mock binaries that return trigger comment - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that properly returns jq-parsed user and trigger comments - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - # gh api user --jq '.login' returns just the login string - if [[ "$*" == *"--jq"* ]]; then - echo "testuser" - else - echo '{"login": "testuser"}' - fi - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - # When --jq and --paginate are used, gh applies jq per-element and outputs transformed objects - # The hook's jq: '.[] | {id: .id, author: .user.login, created_at: .created_at, body: .body}' - if [[ "$*" == *"--jq"* ]]; then - # Return pre-transformed format (what jq would output) - echo '{"id": 1, "author": "testuser", "created_at": "2026-01-18T13:00:00Z", "body": "@claude please review"}' - else - # Return raw GitHub API format - echo '[{"id": 1, "user": {"login": "testuser"}, "created_at": "2026-01-18T13:00:00Z", "body": "@claude please review"}]' - fi - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) echo "/tmp/git" ;; - status) echo "" ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - # Run stop hook - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Capture stderr for debug messages - local hook_stderr - hook_stderr=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1 >/dev/null) || true - - # Check for trigger detection message OR that last_trigger_at is being used - # (which indicates the trigger was detected and persisted) - if echo "$hook_stderr" | grep -q "Found trigger comment at:\|using trigger timestamp"; then - pass "T-E2E-2: Stop hook detects and reports trigger comment" - else - fail "T-E2E-2: Stop hook should detect trigger" "trigger detected" "got: $hook_stderr" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Stop hook handles paginated API response (multi-page trigger detection) -test_e2e_pagination_runtime() { - local test_subdir="$TEST_DIR/e2e_pagination_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: ---- -EOF - - # Create resolve file - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that simulates paginated response (returns multiple JSON arrays) - # The trigger comment is on page 2 (second array) - only visible if pagination works - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - if [[ "$*" == *"--jq"* ]]; then - echo "testuser" - else - echo '{"login": "testuser"}' - fi - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - # When --jq and --paginate are used, gh applies jq per-element and outputs transformed objects - # Page 1: old comment without trigger - # Page 2: newer comment WITH trigger - must combine to find it - if [[ "$*" == *"--paginate"* ]] && [[ "$*" == *"--jq"* ]]; then - # --paginate with --jq: output transformed objects (one per line) - echo '{"id": 1, "author": "other", "created_at": "2026-01-18T11:00:00Z", "body": "old comment"}' - echo '{"id": 2, "author": "testuser", "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review the pagination fix"}' - elif [[ "$*" == *"--paginate"* ]]; then - # --paginate without --jq: output raw arrays - echo '[{"id": 1, "user": {"login": "other"}, "created_at": "2026-01-18T11:00:00Z", "body": "old comment"}]' - echo '[{"id": 2, "user": {"login": "testuser"}, "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review the pagination fix"}]' - else - # No pagination: only first page (trigger NOT found) - echo '[{"id": 1, "user": {"login": "other"}, "created_at": "2026-01-18T11:00:00Z", "body": "old comment"}]' - fi - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) echo "/tmp/git" ;; - status) echo "" ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - # Run stop hook - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_stderr - hook_stderr=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1 >/dev/null) || true - - # Check that trigger was found (proving pagination worked to combine arrays) - if echo "$hook_stderr" | grep -q "Found trigger comment at:\|using trigger timestamp"; then - pass "T-E2E-3: Pagination combines arrays and finds trigger on page 2" - else - fail "T-E2E-3: Pagination should find trigger on page 2" "trigger detected" "got: $hook_stderr" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Stop hook uses last_trigger_at when present (even for round 0) -test_e2e_trigger_priority_runtime() { - local test_subdir="$TEST_DIR/e2e_priority_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file with BOTH started_at and last_trigger_at set - # The trigger timestamp is LATER than started_at - if priority works, - # the hook should use the trigger timestamp (not started_at) - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: 2026-01-18T14:30:00Z ---- -EOF - - # Create resolve file - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - if [[ "$*" == *"--jq"* ]]; then - echo "testuser" - fi - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - echo '[{"id": 1, "author": "testuser", "created_at": "2026-01-18T14:30:00Z", "body": "@claude review"}]' - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) echo "/tmp/git" ;; - status) echo "" ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_stderr - hook_stderr=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1 >/dev/null) || true - - # Check that it reports using trigger timestamp for --after (not started_at) - # Must match the SPECIFIC log format: "Round 0: using trigger timestamp for --after: <timestamp>" - # This proves last_trigger_at is prioritized even for round 0 - if echo "$hook_stderr" | grep -q "Round 0: using trigger timestamp for --after: 2026-01-18T14:30:00Z"; then - pass "T-E2E-4: Round 0 uses last_trigger_at for --after (not started_at)" - else - fail "T-E2E-4: Round 0 should use last_trigger_at for --after" \ - "Round 0: using trigger timestamp for --after: 2026-01-18T14:30:00Z" \ - "got: $hook_stderr" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Run end-to-end tests -test_e2e_missing_resolve_blocks -test_e2e_trigger_detection -test_e2e_pagination_runtime -test_e2e_trigger_priority_runtime - -# ======================================== -# Approval-Only Review Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Approval-Only Review Handling" -echo "========================================" -echo "" - -# Test: Empty-body PR reviews are captured with state placeholder -test_approval_only_review_captured() { - # Simulate PR review with APPROVED state but empty body - local reviews='[ - {"id": 1, "user": {"login": "claude[bot]"}, "state": "APPROVED", "body": null, "submitted_at": "2026-01-18T12:00:00Z"}, - {"id": 2, "user": {"login": "claude[bot]"}, "state": "APPROVED", "body": "", "submitted_at": "2026-01-18T12:01:00Z"}, - {"id": 3, "user": {"login": "claude[bot]"}, "state": "CHANGES_REQUESTED", "body": "Fix bug", "submitted_at": "2026-01-18T12:02:00Z"} - ]' - - # Apply the same jq logic as poll-pr-reviews.sh (fixed version) - local processed - processed=$(echo "$reviews" | jq '[.[] | { - id: .id, - author: .user.login, - state: .state, - body: (if .body == null or .body == "" then "[Review state: \(.state)]" else .body end) - }]') - - local count - count=$(echo "$processed" | jq 'length') - - if [[ "$count" == "3" ]]; then - pass "T-APPROVE-1: Empty-body PR reviews are captured (count=3)" - else - fail "T-APPROVE-1: All reviews should be captured including empty-body" "3" "got $count" - fi - - # Check that empty body gets placeholder - local placeholder_count - placeholder_count=$(echo "$processed" | jq '[.[] | select(.body | test("\\[Review state:"))] | length') - - if [[ "$placeholder_count" == "2" ]]; then - pass "T-APPROVE-2: Empty-body reviews get state placeholder" - else - fail "T-APPROVE-2: Empty-body reviews should get placeholder" "2" "got $placeholder_count" - fi -} - -# Test: Approval-only reviews match bot patterns for polling -test_approval_polls_correctly() { - local bot_pattern="claude\\[bot\\]" - local reviews='[ - {"type": "pr_review", "author": "claude[bot]", "state": "APPROVED", "body": "[Review state: APPROVED]", "created_at": "2026-01-18T12:00:00Z"} - ]' - - local filtered - filtered=$(echo "$reviews" | jq --arg pattern "$bot_pattern" '[.[] | select(.author | test($pattern; "i"))]') - local count - count=$(echo "$filtered" | jq 'length') - - if [[ "$count" == "1" ]]; then - pass "T-APPROVE-3: Approval-only reviews match bot pattern for polling" - else - fail "T-APPROVE-3: Approval-only review should match bot" "1" "got $count" - fi -} - -# Run approval-only review tests -test_approval_only_review_captured -test_approval_polls_correctly - -# ======================================== -# Fixture-Backed Fetch/Poll Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Fetch/Poll with Fixture-Backed Mock GH" -echo "========================================" -echo "" - -# Set up fixture-backed mock gh -setup_fixture_mock_gh() { - local mock_bin_dir="$TEST_DIR/mock_bin" - local fixtures_dir="$SCRIPT_DIR/fixtures" - - # Create the mock gh - "$SCRIPT_DIR/setup-fixture-mock-gh.sh" "$mock_bin_dir" "$fixtures_dir" > /dev/null - - echo "$mock_bin_dir" -} - -# Test: fetch-pr-comments.sh returns all comment types including approval-only reviews -test_fetch_pr_comments_with_fixtures() { - cd "$TEST_DIR" - - local mock_bin_dir - mock_bin_dir=$(setup_fixture_mock_gh) - - # Run fetch-pr-comments.sh with mock gh in PATH - local output_file="$TEST_DIR/pr-comments.md" - PATH="$mock_bin_dir:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$output_file" - - local exit_code=$? - - if [[ $exit_code -ne 0 ]]; then - fail "T-FIXTURE-1: fetch-pr-comments.sh should succeed" "exit=0" "exit=$exit_code" - return - fi - - if [[ ! -f "$output_file" ]]; then - fail "T-FIXTURE-1: Output file should exist" "file exists" "file not found" - return - fi - - # Check for issue comments - if ! grep -q "humanuser" "$output_file"; then - fail "T-FIXTURE-1: Output should contain human issue comment" "humanuser comment" "not found" - return - fi - - # Check for review comments (inline code comments) - if ! grep -q "const instead of let" "$output_file"; then - fail "T-FIXTURE-1: Output should contain inline review comment" "const instead of let" "not found" - return - fi - - # Check for approval-only PR reviews with placeholder - if ! grep -q "\[Review state: APPROVED\]" "$output_file"; then - fail "T-FIXTURE-1: Output should contain approval-only review with placeholder" "[Review state: APPROVED]" "not found" - return - fi - - pass "T-FIXTURE-1: fetch-pr-comments.sh returns all comment types including approval-only" - cd "$SCRIPT_DIR" -} - -# Test: fetch-pr-comments.sh respects --after timestamp filter -test_fetch_pr_comments_after_filter() { - cd "$TEST_DIR" - - local mock_bin_dir - mock_bin_dir=$(setup_fixture_mock_gh) - - # Run with --after filter (after 12:00, should exclude early comments) - local output_file="$TEST_DIR/pr-comments-filtered.md" - PATH="$mock_bin_dir:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$output_file" --after "2026-01-18T12:00:00Z" - - local exit_code=$? - - if [[ $exit_code -ne 0 ]]; then - fail "T-FIXTURE-2: fetch-pr-comments.sh --after should succeed" "exit=0" "exit=$exit_code" - return - fi - - # Should include late comments (13:00+ approvals) - if ! grep -q "\[Review state: APPROVED\]" "$output_file"; then - fail "T-FIXTURE-2: Should include late approval-only review" "[Review state: APPROVED]" "not found" - return - fi - - # Should NOT include early human comment from 09:00 - # (humanreviewer's "LGTM!" was at 09:00) - if grep -q "LGTM" "$output_file"; then - fail "T-FIXTURE-2: Should exclude comments before --after timestamp" "no LGTM" "LGTM found" - return - fi - - pass "T-FIXTURE-2: fetch-pr-comments.sh --after filter works correctly" - cd "$SCRIPT_DIR" -} - -# Test: poll-pr-reviews.sh returns JSON with approval-only reviews -test_poll_pr_reviews_with_fixtures() { - cd "$TEST_DIR" - - local mock_bin_dir - mock_bin_dir=$(setup_fixture_mock_gh) - - # Run poll-pr-reviews.sh with mock gh in PATH - # Use early timestamp to catch all bot reviews - local output - output=$(PATH="$mock_bin_dir:$PATH" "$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 123 \ - --after "2026-01-18T10:00:00Z" \ - --bots "claude,codex") - - local exit_code=$? - - if [[ $exit_code -ne 0 ]]; then - fail "T-FIXTURE-3: poll-pr-reviews.sh should succeed" "exit=0" "exit=$exit_code" - return - fi - - # Validate JSON structure - if ! echo "$output" | jq . > /dev/null 2>&1; then - fail "T-FIXTURE-3: Output should be valid JSON" "valid JSON" "invalid JSON" - return - fi - - # Check for approval-only reviews in comments - local has_placeholder - has_placeholder=$(echo "$output" | jq '[.comments[]? | select(.body | test("\\[Review state:"))] | length') - - if [[ "$has_placeholder" -lt 1 ]]; then - fail "T-FIXTURE-3: Should include approval-only reviews with placeholder" ">=1" "$has_placeholder" - return - fi - - # Check bots_responded includes both bots - local bots_count - bots_count=$(echo "$output" | jq '.bots_responded | length') - - if [[ "$bots_count" -lt 1 ]]; then - fail "T-FIXTURE-3: Should have bots in bots_responded" ">=1" "$bots_count" - return - fi - - pass "T-FIXTURE-3: poll-pr-reviews.sh returns approval-only reviews in JSON" - cd "$SCRIPT_DIR" -} - -# Test: poll-pr-reviews.sh filters by --after timestamp correctly -test_poll_pr_reviews_after_filter() { - cd "$TEST_DIR" - - local mock_bin_dir - mock_bin_dir=$(setup_fixture_mock_gh) - - # Use timestamp that filters out early CHANGES_REQUESTED (11:00) - # but includes late APPROVED reviews (13:00, 13:30) - local output - output=$(PATH="$mock_bin_dir:$PATH" "$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 123 \ - --after "2026-01-18T12:30:00Z" \ - --bots "claude,codex") - - local exit_code=$? - - if [[ $exit_code -ne 0 ]]; then - fail "T-FIXTURE-4: poll-pr-reviews.sh --after should succeed" "exit=0" "exit=$exit_code" - return - fi - - # Should have claude[bot] approval at 13:00 and codex approval at 13:30 - local comment_count - comment_count=$(echo "$output" | jq '.comments | length') - - # At minimum, should have the late approvals - if [[ "$comment_count" -lt 1 ]]; then - fail "T-FIXTURE-4: Should include late approvals" ">=1" "$comment_count" - return - fi - - # Should NOT include the CHANGES_REQUESTED from 11:00 (before our --after) - local changes_requested - changes_requested=$(echo "$output" | jq '[.comments[]? | select(.body | test("security concerns"))] | length') - - if [[ "$changes_requested" -gt 0 ]]; then - fail "T-FIXTURE-4: Should exclude comments before --after" "0" "$changes_requested" - return - fi - - pass "T-FIXTURE-4: poll-pr-reviews.sh --after filter excludes early comments" - cd "$SCRIPT_DIR" -} - -# Run fixture-backed tests -test_fetch_pr_comments_with_fixtures -test_fetch_pr_comments_after_filter -test_poll_pr_reviews_with_fixtures -test_poll_pr_reviews_after_filter - -# ======================================== -# Wrong-Round Validation Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Wrong-Round Validation" -echo "========================================" -echo "" - -# Test: Wrong-round pr-resolve write is blocked -test_wrong_round_pr_resolve_blocked() { - cd "$TEST_DIR" - - local timestamp="2026-01-18_15-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - # State says current_round is 2 - cat > "$loop_dir/state.md" << EOF ---- -current_round: 2 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Try to write to round-0 (wrong round) - local hook_input='{"tool_name": "Write", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/round-0-pr-resolve.md", "content": "wrong round"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 2 ]] && echo "$output" | grep -qi "wrong round"; then - pass "T-ROUND-1: Wrong-round pr-resolve write is blocked" - else - fail "T-ROUND-1: Wrong-round pr-resolve should be blocked" "exit=2, wrong round" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Test: Correct-round pr-resolve write is allowed -test_correct_round_pr_resolve_allowed() { - cd "$TEST_DIR" - - local timestamp="2026-01-18_15-01-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - # State says current_round is 2 - cat > "$loop_dir/state.md" << EOF ---- -current_round: 2 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Write to round-2 (correct round) - local hook_input='{"tool_name": "Write", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/round-2-pr-resolve.md", "content": "correct round"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 0 ]]; then - pass "T-ROUND-2: Correct-round pr-resolve write is allowed" - else - fail "T-ROUND-2: Correct-round pr-resolve should be allowed" "exit=0" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Test: Wrong-round pr-resolve edit is blocked -test_wrong_round_pr_resolve_edit_blocked() { - cd "$TEST_DIR" - - local timestamp="2026-01-18_15-02-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 3 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Try to edit round-1 (wrong round) - local hook_input='{"tool_name": "Edit", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/round-1-pr-resolve.md", "old_string": "x", "new_string": "y"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 2 ]] && echo "$output" | grep -qi "wrong round"; then - pass "T-ROUND-3: Wrong-round pr-resolve edit is blocked" - else - fail "T-ROUND-3: Wrong-round pr-resolve edit should be blocked" "exit=2, wrong round" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Run wrong-round validation tests -test_wrong_round_pr_resolve_blocked -test_correct_round_pr_resolve_allowed -test_wrong_round_pr_resolve_edit_blocked - -# ======================================== -# Monitor PR Active Bots Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Monitor PR Active Bots Display" -echo "========================================" -echo "" - -# Test: Monitor parses YAML list for active_bots -test_monitor_yaml_list_parsing() { - local test_subdir="$TEST_DIR/monitor_yaml_test" - mkdir -p "$test_subdir" - - # Use helper script to create state file (avoids validator blocking) - "$SCRIPT_DIR/setup-monitor-test-env.sh" "$test_subdir" yaml_list >/dev/null - - # Source the humanize script and run monitor from test subdirectory (use --once for non-interactive) - cd "$test_subdir" - local output - output=$(source "$PROJECT_ROOT/scripts/humanize.sh" && humanize monitor pr --once 2>&1) || true - cd "$SCRIPT_DIR" - - # Check that active bots are displayed correctly (comma-separated) - if echo "$output" | grep -q "Active Bots:.*claude.*codex\|Active Bots:.*codex.*claude"; then - pass "T-MONITOR-1: Monitor parses and displays YAML list active_bots" - else - # Also accept claude,codex format - if echo "$output" | grep -q "Active Bots:.*claude,codex\|Active Bots:.*codex,claude"; then - pass "T-MONITOR-1: Monitor parses and displays YAML list active_bots" - else - fail "T-MONITOR-1: Monitor should display active bots from YAML list" "claude,codex" "got: $output" - fi - fi -} - -# Test: Monitor shows configured_bots separately -test_monitor_configured_bots() { - local test_subdir="$TEST_DIR/monitor_configured_test" - mkdir -p "$test_subdir" - - # Use helper script to create state file (avoids validator blocking) - "$SCRIPT_DIR/setup-monitor-test-env.sh" "$test_subdir" configured >/dev/null - - # Source the humanize script and run monitor from test subdirectory (use --once for non-interactive) - cd "$test_subdir" - local output - output=$(source "$PROJECT_ROOT/scripts/humanize.sh" && humanize monitor pr --once 2>&1) || true - cd "$SCRIPT_DIR" - - # Check that both configured and active bots are displayed - if echo "$output" | grep -q "Configured Bots:.*claude.*codex\|Configured Bots:.*codex.*claude\|Configured Bots:.*claude,codex\|Configured Bots:.*codex,claude"; then - pass "T-MONITOR-2: Monitor displays configured_bots" - else - fail "T-MONITOR-2: Monitor should display configured bots" "claude,codex" "got: $output" - fi -} - -# Test: Monitor shows 'none' when active_bots is empty -test_monitor_empty_active_bots() { - local test_subdir="$TEST_DIR/monitor_empty_test" - mkdir -p "$test_subdir" - - # Use helper script to create state file (avoids validator blocking) - "$SCRIPT_DIR/setup-monitor-test-env.sh" "$test_subdir" empty >/dev/null - - # Source the humanize script and run monitor from test subdirectory (use --once for non-interactive) - cd "$test_subdir" - local output - output=$(source "$PROJECT_ROOT/scripts/humanize.sh" && humanize monitor pr --once 2>&1) || true - cd "$SCRIPT_DIR" - - # Check that active bots shows 'none' - if echo "$output" | grep -q "Active Bots:.*none"; then - pass "T-MONITOR-3: Monitor shows 'none' for empty active_bots" - else - fail "T-MONITOR-3: Monitor should show 'none' for empty active_bots" "none" "got: $output" - fi -} - -# Run monitor tests -test_monitor_yaml_list_parsing -test_monitor_configured_bots -test_monitor_empty_active_bots - -} diff --git a/tests/test-pr-loop-lib.sh b/tests/test-pr-loop-lib.sh deleted file mode 100644 index 3d7693cb..00000000 --- a/tests/test-pr-loop-lib.sh +++ /dev/null @@ -1,145 +0,0 @@ -#!/bin/bash -# -# Common library for PR loop tests -# -# Provides shared setup, helpers, and mock functions used by all test modules. -# -# Usage: source test-pr-loop-lib.sh -# - -# Determine script location -if [[ -z "${TEST_PR_LOOP_LIB_LOADED:-}" ]]; then - TEST_PR_LOOP_LIB_LOADED=1 - - # Get directories if not already set - SCRIPT_DIR="${SCRIPT_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)}" - PROJECT_ROOT="${PROJECT_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd)}" - - # Source test helpers if not already sourced - if ! declare -f setup_test_dir &>/dev/null; then - source "$SCRIPT_DIR/test-helpers.sh" - fi - - # ======================================== - # Mock Creation Functions - # ======================================== - - # Create mock scripts for gh CLI - create_mock_gh() { - local mock_dir="$1" - mkdir -p "$mock_dir" - - cat > "$mock_dir/gh" << 'MOCK_GH' -#!/bin/bash -# Mock gh CLI for testing - -case "$1" in - auth) - if [[ "$2" == "status" ]]; then - echo "Logged in to github.com" - exit 0 - fi - ;; - repo) - if [[ "$2" == "view" ]]; then - if [[ "$3" == "--json" && "$4" == "owner" ]]; then - echo '{"login": "testowner"}' - elif [[ "$3" == "--json" && "$4" == "name" ]]; then - echo '{"name": "testrepo"}' - fi - exit 0 - fi - ;; - pr) - if [[ "$2" == "view" ]]; then - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T12:00:00Z" - exit 0 - elif [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T12:00:00Z"}]}' - exit 0 - elif [[ "$3" == "--json" && "$4" == "number" ]]; then - echo '{"number": 123}' - exit 0 - elif [[ "$3" == "--json" && "$4" == "state" ]] || [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - exit 0 - fi - ;; - api) - # Handle user endpoint - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return empty arrays for comment/review fetching - echo "[]" - exit 0 - ;; -esac - -echo "Mock gh: unhandled command: $*" >&2 -exit 1 -MOCK_GH - chmod +x "$mock_dir/gh" - } - - # Create mock codex command - create_mock_codex() { - local mock_dir="$1" - - cat > "$mock_dir/codex" << 'MOCK_CODEX' -#!/bin/bash -# Mock codex CLI for testing -echo "Mock codex output" -exit 0 -MOCK_CODEX - chmod +x "$mock_dir/codex" - } - - # ======================================== - # Test Environment Setup - # ======================================== - - # Initialize test environment (call once at start of test run) - init_pr_loop_test_env() { - setup_test_dir - - # Create mock scripts directory and wire it into PATH - MOCK_BIN_DIR="$TEST_DIR/mock_bin" - mkdir -p "$MOCK_BIN_DIR" - export PATH="$MOCK_BIN_DIR:$PATH" - - # Initialize mock gh and codex in the PATH - create_mock_gh "$MOCK_BIN_DIR" - create_mock_codex "$MOCK_BIN_DIR" - - export MOCK_BIN_DIR - } - - # ======================================== - # Test Result Summary - # ======================================== - - # Print test summary and exit with appropriate code - print_test_summary() { - echo "" - echo "========================================" - echo "PR Loop Tests" - echo "========================================" - echo -e "Passed: \033[0;32m$TESTS_PASSED\033[0m" - echo -e "Failed: \033[0;31m$TESTS_FAILED\033[0m" - echo "" - - if [[ $TESTS_FAILED -gt 0 ]]; then - echo -e "\033[0;31mSome tests failed!\033[0m" - return 1 - else - echo -e "\033[0;32mAll tests passed!\033[0m" - return 0 - fi - } -fi diff --git a/tests/test-pr-loop-scripts.sh b/tests/test-pr-loop-scripts.sh deleted file mode 100644 index b1ce5a42..00000000 --- a/tests/test-pr-loop-scripts.sh +++ /dev/null @@ -1,410 +0,0 @@ -#!/bin/bash -# -# PR Loop Script Tests -# -# Tests for script argument parsing and validation: -# - setup-pr-loop.sh -# - cancel-pr-loop.sh -# - fetch-pr-comments.sh -# - poll-pr-reviews.sh -# -# Usage: source test-pr-loop-scripts.sh && run_script_tests -# - -# ======================================== -# setup-pr-loop.sh Tests -# ======================================== - -run_setup_tests() { - echo "" - echo "========================================" - echo "Testing setup-pr-loop.sh" - echo "========================================" - echo "" - - SETUP_SCRIPT="$PROJECT_ROOT/scripts/setup-pr-loop.sh" - - # Test: Help flag works - test_setup_help() { - local output - output=$("$SETUP_SCRIPT" --help 2>&1) || true - if echo "$output" | grep -q "start-pr-loop"; then - pass "T-POS-1: --help displays usage information" - else - fail "T-POS-1: --help should display usage information" - fi - } - - # Test: Missing bot flag shows error - test_setup_no_bot_flag() { - local output - local exit_code - output=$("$SETUP_SCRIPT" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "at least one bot flag"; then - pass "T-NEG-1: Missing bot flag shows error" - else - fail "T-NEG-1: Missing bot flag should show error" "exit code != 0 and error message" "exit=$exit_code, output=$output" - fi - } - - # Test: Invalid bot flag shows error - test_setup_invalid_bot() { - local output - local exit_code - output=$("$SETUP_SCRIPT" --invalid-bot 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "unknown option"; then - pass "T-NEG-2: Invalid bot flag shows error" - else - fail "T-NEG-2: Invalid bot flag should show error" "exit code != 0" "exit=$exit_code" - fi - } - - # Test: --claude flag is recognized - test_setup_claude_flag() { - # This will fail because no git repo, but we test that --claude is parsed - local output - output=$("$SETUP_SCRIPT" --claude 2>&1) || true - - # Should not complain about missing bot flag - if ! echo "$output" | grep -qi "at least one bot flag"; then - pass "T-POS-2: --claude flag is recognized" - else - fail "T-POS-2: --claude flag should be recognized" - fi - } - - # Test: --codex flag is recognized - test_setup_codex_flag() { - local output - output=$("$SETUP_SCRIPT" --codex 2>&1) || true - - if ! echo "$output" | grep -qi "at least one bot flag"; then - pass "T-POS-3: --codex flag is recognized" - else - fail "T-POS-3: --codex flag should be recognized" - fi - } - - # Test: Both bot flags work together - test_setup_both_bots() { - local output - output=$("$SETUP_SCRIPT" --claude --codex 2>&1) || true - - if ! echo "$output" | grep -qi "at least one bot flag"; then - pass "T-POS-4: Both bot flags work together" - else - fail "T-POS-4: Both bot flags should work together" - fi - } - - # Test: --max argument is parsed - test_setup_max_arg() { - local output - output=$("$SETUP_SCRIPT" --claude --max 10 2>&1) || true - - # Should not complain about --max - if ! echo "$output" | grep -qi "max requires"; then - pass "T-POS-5: --max argument is parsed" - else - fail "T-POS-5: --max argument should be parsed" - fi - } - - # Test: --max with invalid value shows error - test_setup_max_invalid() { - local output - local exit_code - output=$("$SETUP_SCRIPT" --claude --max abc 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "must be.*integer"; then - pass "T-NEG-3: --max with invalid value shows error" - else - fail "T-NEG-3: --max with invalid value should show error" - fi - } - - # Test: --codex-model argument is parsed - test_setup_codex_model() { - local output - output=$("$SETUP_SCRIPT" --claude --codex-model gpt-4:high 2>&1) || true - - if ! echo "$output" | grep -qi "codex-model requires"; then - pass "T-POS-6: --codex-model argument is parsed" - else - fail "T-POS-6: --codex-model argument should be parsed" - fi - } - - # Test: --codex-timeout argument is parsed - test_setup_codex_timeout() { - local output - output=$("$SETUP_SCRIPT" --claude --codex-timeout 1800 2>&1) || true - - if ! echo "$output" | grep -qi "codex-timeout requires"; then - pass "T-POS-7: --codex-timeout argument is parsed" - else - fail "T-POS-7: --codex-timeout argument should be parsed" - fi - } - - # Run setup tests - test_setup_help - test_setup_no_bot_flag - test_setup_invalid_bot - test_setup_claude_flag - test_setup_codex_flag - test_setup_both_bots - test_setup_max_arg - test_setup_max_invalid - test_setup_codex_model - test_setup_codex_timeout -} - -# ======================================== -# cancel-pr-loop.sh Tests -# ======================================== - -run_cancel_tests() { - echo "" - echo "========================================" - echo "Testing cancel-pr-loop.sh" - echo "========================================" - echo "" - - CANCEL_SCRIPT="$PROJECT_ROOT/scripts/cancel-pr-loop.sh" - - # Test: Help flag works - test_cancel_help() { - local output - output=$("$CANCEL_SCRIPT" --help 2>&1) || true - if echo "$output" | grep -q "cancel-pr-loop"; then - pass "T-POS-8: --help displays usage information" - else - fail "T-POS-8: --help should display usage information" - fi - } - - # Test: No loop returns NO_LOOP - test_cancel_no_loop() { - cd "$TEST_DIR" - # Export CLAUDE_PROJECT_DIR to ensure cancel script looks in test dir - export CLAUDE_PROJECT_DIR="$TEST_DIR" - local output - local exit_code - output=$("$CANCEL_SCRIPT" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - unset CLAUDE_PROJECT_DIR - - if [[ $exit_code -eq 1 ]] && echo "$output" | grep -q "NO_LOOP"; then - pass "T-NEG-4: No active loop returns NO_LOOP" - else - fail "T-NEG-4: No active loop should return NO_LOOP" "exit=1, NO_LOOP" "exit=$exit_code, output=$output" - fi - cd - > /dev/null - } - - # Test: Cancel works with active loop - test_cancel_active_loop() { - cd "$TEST_DIR" - # Export CLAUDE_PROJECT_DIR to ensure cancel script looks in test dir - export CLAUDE_PROJECT_DIR="$TEST_DIR" - - # Create mock loop directory - local timestamp="2026-01-18_12-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 1 -max_iterations: 42 -pr_number: 123 ---- -EOF - - local output - local exit_code - output=$("$CANCEL_SCRIPT" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - unset CLAUDE_PROJECT_DIR - - if [[ $exit_code -eq 0 ]] && echo "$output" | grep -q "CANCELLED"; then - if [[ -f "$loop_dir/cancel-state.md" ]] && [[ ! -f "$loop_dir/state.md" ]]; then - pass "T-POS-9: Cancel works and renames state file" - else - fail "T-POS-9: Cancel should rename state.md to cancel-state.md" - fi - else - fail "T-POS-9: Cancel should work with active loop" "exit=0, CANCELLED" "exit=$exit_code" - fi - - cd - > /dev/null - } - - # Run cancel tests - test_cancel_help - test_cancel_no_loop - test_cancel_active_loop -} - -# ======================================== -# fetch-pr-comments.sh Tests -# ======================================== - -run_fetch_tests() { - echo "" - echo "========================================" - echo "Testing fetch-pr-comments.sh" - echo "========================================" - echo "" - - FETCH_SCRIPT="$PROJECT_ROOT/scripts/fetch-pr-comments.sh" - - # Test: Help flag works - test_fetch_help() { - local output - output=$("$FETCH_SCRIPT" --help 2>&1) || true - if echo "$output" | grep -q "fetch-pr-comments"; then - pass "T-POS-10: --help displays usage information" - else - fail "T-POS-10: --help should display usage information" - fi - } - - # Test: Missing PR number shows error - test_fetch_no_pr() { - local output - local exit_code - output=$("$FETCH_SCRIPT" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "pr number.*required"; then - pass "T-NEG-5: Missing PR number shows error" - else - fail "T-NEG-5: Missing PR number should show error" - fi - } - - # Test: Missing output file shows error - test_fetch_no_output() { - local output - local exit_code - output=$("$FETCH_SCRIPT" 123 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "output file.*required"; then - pass "T-NEG-6: Missing output file shows error" - else - fail "T-NEG-6: Missing output file should show error" - fi - } - - # Test: Invalid PR number shows error - test_fetch_invalid_pr() { - local output - local exit_code - output=$("$FETCH_SCRIPT" abc /tmp/out.md 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "invalid pr number"; then - pass "T-NEG-7: Invalid PR number shows error" - else - fail "T-NEG-7: Invalid PR number should show error" - fi - } - - # Run fetch tests - test_fetch_help - test_fetch_no_pr - test_fetch_no_output - test_fetch_invalid_pr -} - -# ======================================== -# poll-pr-reviews.sh Tests -# ======================================== - -run_poll_tests() { - echo "" - echo "========================================" - echo "Testing poll-pr-reviews.sh" - echo "========================================" - echo "" - - POLL_SCRIPT="$PROJECT_ROOT/scripts/poll-pr-reviews.sh" - - # Test: Help flag works - test_poll_help() { - local output - output=$("$POLL_SCRIPT" --help 2>&1) || true - if echo "$output" | grep -q "poll-pr-reviews"; then - pass "T-POS-11: --help displays usage information" - else - fail "T-POS-11: --help should display usage information" - fi - } - - # Test: Missing PR number shows error - test_poll_no_pr() { - local output - local exit_code - output=$("$POLL_SCRIPT" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "pr number.*required"; then - pass "T-NEG-8: Missing PR number shows error" - else - fail "T-NEG-8: Missing PR number should show error" - fi - } - - # Test: Missing --after shows error - test_poll_no_after() { - local output - local exit_code - output=$("$POLL_SCRIPT" 123 --bots claude 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "after.*required"; then - pass "T-NEG-9: Missing --after shows error" - else - fail "T-NEG-9: Missing --after should show error" - fi - } - - # Test: Missing --bots shows error - test_poll_no_bots() { - local output - local exit_code - output=$("$POLL_SCRIPT" 123 --after 2026-01-18T00:00:00Z 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "bots.*required"; then - pass "T-NEG-10: Missing --bots shows error" - else - fail "T-NEG-10: Missing --bots should show error" - fi - } - - # Run poll tests - test_poll_help - test_poll_no_pr - test_poll_no_after - test_poll_no_bots -} - -# ======================================== -# Main Entry Point -# ======================================== - -run_script_tests() { - run_setup_tests - run_cancel_tests - run_fetch_tests - run_poll_tests -} diff --git a/tests/test-pr-loop-stophook.sh b/tests/test-pr-loop-stophook.sh deleted file mode 100644 index 1e71dcdf..00000000 --- a/tests/test-pr-loop-stophook.sh +++ /dev/null @@ -1,1782 +0,0 @@ -#!/bin/bash -# -# PR Loop Stop Hook Tests -# -# Tests for the stop hook functionality: -# - Force push detection -# - Trigger validation -# - Bot timeout handling -# - State file management -# - Dynamic startup_case updates -# -# Usage: source test-pr-loop-stophook.sh && run_stophook_tests -# - -run_stophook_tests() { -# ======================================== -# Stop-Hook Integration Tests -# ======================================== - -# Test: Force push trigger validation - old triggers rejected after force push -test_stophook_force_push_rejects_old_trigger() { - local test_subdir="$TEST_DIR/stophook_force_push_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file with latest_commit_at set to AFTER the old trigger comment - # This simulates: force push happened after the old trigger was posted - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 1 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 4 -latest_commit_sha: newsha123 -latest_commit_at: 2026-01-18T14:00:00Z ---- -EOF - - # Create resolve file - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-1-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns OLD trigger comment (BEFORE latest_commit_at) - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash -# Check if --jq is in arguments (for transformed format) -HAS_JQ=false -for arg in "$@"; do - if [[ "$arg" == "--jq" || "$arg" == "-q" ]]; then - HAS_JQ=true - break - fi -done - -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - # Return old trigger comment from 12:00 (BEFORE latest_commit_at of 14:00) - if [[ "$HAS_JQ" == "true" ]]; then - # With --jq --paginate, output one transformed object per line - echo '{"id": 1, "author": "testuser", "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review"}' - else - # Raw GitHub API format - echo '[{"id": 1, "user": {"login": "testuser"}, "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review"}]' - fi - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T10:00:00Z" - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "newsha123" # Match state file - else - echo "/tmp/git" - fi - ;; - status) echo "" ;; - merge-base) exit 0 ;; # Pretend no force push in this test -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook and capture output - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # The old trigger should be rejected because it's before latest_commit_at - # Stop hook should block requiring a new trigger - if echo "$hook_output" | grep -qi "trigger\|comment @\|re-trigger\|no trigger"; then - pass "T-STOPHOOK-1: Force push validation rejects old trigger comment" - else - fail "T-STOPHOOK-1: Should reject old trigger after force push" "block/require trigger" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Step 7 Case 1 exception - no trigger required for startup_case=1, round=0 -test_stophook_case1_no_trigger_required() { - local test_subdir="$TEST_DIR/stophook_case1_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file with startup_case=1 and round=0 - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 2 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - # Create resolve file - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns no trigger comments, but has codex +1 - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - if [[ "$2" == *"/issues/"*"/reactions"* ]]; then - # Return codex +1 reaction (triggers approval) - echo '[{"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T10:05:00Z"}]' - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - echo '[]' # No comments - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - else - echo "/tmp/git" - fi - ;; - status) echo "" ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_stderr - hook_stderr=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1 >/dev/null) || true - - # Case 1 exception: should NOT block for missing trigger - if echo "$hook_stderr" | grep -q "trigger not required\|Case 1\|startup_case=1"; then - pass "T-STOPHOOK-2: Case 1 exception - no trigger required" - else - # Alternative: check that it didn't block - if ! echo "$hook_stderr" | grep -qi "block.*trigger\|missing.*trigger\|comment @"; then - pass "T-STOPHOOK-2: Case 1 exception - no trigger required (no block)" - else - fail "T-STOPHOOK-2: Case 1 should not require trigger" "no block" "got: $hook_stderr" - fi - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Step 9 - APPROVE creates approve-state.md -test_stophook_approve_creates_state() { - local test_subdir="$TEST_DIR/stophook_approve_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file with empty active_bots (YAML list format, no items) - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 1 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: 2026-01-18T11:00:00Z -trigger_comment_id: 123 -startup_case: 3 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - # Create resolve file (required by stop hook) - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-1-pr-resolve.md" - - export CLAUDE_PROJECT_DIR="$test_subdir" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - else - echo "/tmp/git" - fi - ;; - status) echo "" ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export PATH="$mock_bin:$PATH" - - # Run stop hook - with empty active_bots, it should approve - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Check for approve-state.md creation - if [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - pass "T-STOPHOOK-3: APPROVE creates approve-state.md" - else - # Alternative: check output for approval message - if echo "$hook_output" | grep -qi "approved\|complete"; then - pass "T-STOPHOOK-3: APPROVE creates approve-state.md (via message)" - else - fail "T-STOPHOOK-3: Should create approve-state.md" "approve-state.md exists" "not found" - fi - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Dynamic startup_case update when new comments arrive -test_stophook_dynamic_startup_case() { - local test_subdir="$TEST_DIR/stophook_dynamic_case_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Start with startup_case=1 (no comments) - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude - - codex -active_bots: - - claude - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 2 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns bot comments (simulating comments arriving) - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return bot comments (claude and codex have commented) - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - echo '[{"id":1,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T10:05:00Z","body":"Found issue"},{"id":2,"user":{"login":"chatgpt-codex-connector[bot]"},"created_at":"2026-01-18T10:06:00Z","body":"Also found issue"}]' - exit 0 - fi - if [[ "$2" == *"/pulls/"*"/reviews"* ]]; then - echo '[]' - exit 0 - fi - if [[ "$2" == *"/pulls/"*"/comments"* ]]; then - echo '[]' - exit 0 - fi - if [[ "$2" == *"/reactions"* ]]; then - echo '[]' - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T09:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T09:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - else - echo "/tmp/git" - fi - ;; - status) echo "" ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook with timeout (it may poll, so limit to 5 seconds) - timeout 5 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT" >/dev/null 2>&1 || true - - # Check if startup_case was updated in state file - local new_case - new_case=$(grep "^startup_case:" "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" 2>/dev/null | sed 's/startup_case: *//' | tr -d ' ' || true) - - # With both bots commented and no new commits, should be Case 3 - if [[ "$new_case" == "3" ]]; then - pass "T-STOPHOOK-4: Dynamic startup_case updated to 3 (all commented, no new commits)" - elif [[ -n "$new_case" && "$new_case" != "1" ]]; then - pass "T-STOPHOOK-4: Dynamic startup_case updated from 1 to $new_case" - else - fail "T-STOPHOOK-4: startup_case should update dynamically" "case 3" "got: $new_case" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Step 6 - unpushed commits block exit -test_stophook_step6_unpushed_commits() { - local test_subdir="$TEST_DIR/stophook_step6_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - # Mock git that reports unpushed commits - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" # Clean working directory - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch...origin/test-branch [ahead 2]" # 2 unpushed commits - fi - ;; - branch) - echo "test-branch" - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Should block with unpushed commits message - if echo "$hook_output" | grep -qi "unpushed\|ahead\|push.*commit"; then - pass "T-STOPHOOK-5: Step 6 blocks on unpushed commits" - else - fail "T-STOPHOOK-5: Step 6 should block on unpushed commits" "unpushed/ahead message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Step 6.5 - force push detection with actual history rewrite simulation -test_stophook_step65_force_push_detection() { - local test_subdir="$TEST_DIR/stophook_step65_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # State with old commit SHA - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: 2026-01-18T10:30:00Z -trigger_comment_id: 999 -startup_case: 1 -latest_commit_sha: oldsha123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash -case "$1" in - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T12:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T12:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - # Mock git that simulates force push: old commit is NOT ancestor of current HEAD - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "newsha456" # Different from oldsha123 in state - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) - # Simulate force push: old commit is NOT an ancestor - # --is-ancestor exits 1 when not ancestor - exit 1 - ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Should detect force push and block - if echo "$hook_output" | grep -qi "force.*push\|history.*rewrite\|re-trigger"; then - pass "T-STOPHOOK-6: Step 6.5 detects force push (history rewrite)" - else - fail "T-STOPHOOK-6: Step 6.5 should detect force push" "force push message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Step 7 - missing trigger comment blocks (Case 4/5) -test_stophook_step7_missing_trigger() { - local test_subdir="$TEST_DIR/stophook_step7_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # State with startup_case=4 (requires trigger) but no trigger - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 4 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T12:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns no trigger comments - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - echo '[]' # No comments - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T12:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T12:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Should block with missing trigger message - if echo "$hook_output" | grep -qi "trigger\|@.*mention\|comment"; then - pass "T-STOPHOOK-7: Step 7 blocks on missing trigger (Case 4)" - else - fail "T-STOPHOOK-7: Step 7 should block on missing trigger" "trigger/mention message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Bot timeout auto-removes bot from active_bots -test_stophook_bot_timeout_auto_remove() { - local test_subdir="$TEST_DIR/stophook_timeout_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # State with short poll_timeout (2 seconds) to test timeout behavior - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 2 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: 2026-01-18T10:30:00Z -trigger_comment_id: 999 -startup_case: 3 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns NO bot comments (simulates bot not responding) - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return empty for all comment/review queries - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T10:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T10:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook with short timeout - it should time out and auto-remove bots - local hook_output - hook_output=$(timeout 10 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT") || true - - # Should either mention timeout or create approve-state (if all bots timed out) - if echo "$hook_output" | grep -qi "timeout\|timed out\|auto-remove\|approved"; then - pass "T-STOPHOOK-8: Bot timeout handling" - elif [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - pass "T-STOPHOOK-8: Bot timeout created approve-state.md" - else - fail "T-STOPHOOK-8: Bot timeout should trigger auto-remove" "timeout/approved message" "got: $hook_output" - fi - - # VERIFICATION: Check that active_bots was actually updated (removed the bot) - # After timeout, either: - # 1. approve-state.md exists with empty active_bots (all bots timed out) - # 2. state.md has the timed-out bot removed from active_bots - local state_file="" - if [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - state_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" - elif [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" ]]; then - state_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" - fi - - # VERIFICATION: Check that approve-state.md was created with empty active_bots - local approve_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" - if [[ -f "$approve_file" ]]; then - pass "T-STOPHOOK-8a: approve-state.md created - bot timeout led to loop completion" - # Verify active_bots is empty (not containing 'codex') - local active_bots_line - active_bots_line=$(grep "^active_bots:" "$approve_file" 2>/dev/null || true) - # After the line "active_bots:", check if there are any bot entries - local next_line_has_bot - next_line_has_bot=$(sed -n '/^active_bots:/,/^[a-z_]*:/p' "$approve_file" | grep -E '^\s*-\s*\w' || true) - if [[ -z "$next_line_has_bot" ]]; then - pass "T-STOPHOOK-8b: active_bots is empty after timeout" - else - fail "T-STOPHOOK-8b: active_bots should be empty after timeout" "no bots listed" "got: $next_line_has_bot" - fi - else - fail "T-STOPHOOK-8a: approve-state.md should exist after bot timeout" "approve-state.md exists" "file not found" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Codex +1 detection removes codex from active_bots -test_stophook_codex_thumbsup_approval() { - local test_subdir="$TEST_DIR/stophook_thumbsup_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # State with startup_case=1 (required for +1 check) and only codex as active bot - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 2 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns +1 reaction from codex - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return +1 reaction for PR reactions query - if [[ "$2" == *"/issues/"*"/reactions"* ]]; then - echo '[{"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T10:05:00Z"}]' - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T10:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T10:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Should detect +1 and create approve-state.md (since codex is only bot) - if echo "$hook_output" | grep -qi "+1\|thumbsup\|approved"; then - pass "T-STOPHOOK-9: Codex +1 detection" - elif [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - pass "T-STOPHOOK-9: Codex +1 created approve-state.md" - else - fail "T-STOPHOOK-9: Codex +1 should be detected" "+1/approved message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Claude eyes timeout blocks exit -test_stophook_claude_eyes_timeout() { - local test_subdir="$TEST_DIR/stophook_eyes_timeout_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # State with claude configured and trigger required (round > 0) - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 1 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: 2026-01-18T11:00:00Z -trigger_comment_id: 12345 -startup_case: 3 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-1-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns NO eyes reaction (simulates claude bot not configured) - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash -# Check if --jq is in arguments (for transformed format) -HAS_JQ=false -for arg in "$@"; do - if [[ "$arg" == "--jq" || "$arg" == "-q" ]]; then - HAS_JQ=true - break - fi -done - -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return empty reactions - no eyes - if [[ "$2" == *"/reactions"* ]]; then - echo "[]" - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - # Return trigger comment - if [[ "$HAS_JQ" == "true" ]]; then - # With --jq --paginate, output one transformed object per line - echo '{"id": 12345, "author": "testuser", "created_at": "2026-01-18T11:00:00Z", "body": "@claude please review"}' - else - # Raw GitHub API format - echo '[{"id": 12345, "user": {"login": "testuser"}, "created_at": "2026-01-18T11:00:00Z", "body": "@claude please review"}]' - fi - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T10:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T10:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run with timeout since eyes check has 3x5s retry (15s total) - local hook_output - hook_output=$(timeout 20 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT") || true - - # Should block with eyes timeout message - if echo "$hook_output" | grep -qi "eyes\|not responding\|timeout\|bot.*configured"; then - pass "T-STOPHOOK-10: Claude eyes timeout blocks exit" - else - fail "T-STOPHOOK-10: Claude eyes timeout should block" "eyes/timeout message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Dynamic startup_case update when comments arrive -test_stophook_dynamic_startup_case_update() { - local test_subdir="$TEST_DIR/stophook_dynamic_case_test2" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Use dynamic timestamps to ensure polling doesn't time out immediately - # Timeline: commit -> trigger -> comment (all recent, all within poll_timeout) - local trigger_ts commit_ts comment_ts - # Trigger was 10 seconds ago - trigger_ts=$(date -u -d "-10 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-10S +%Y-%m-%dT%H:%M:%SZ) - # Commit was 60 seconds ago (before trigger) - commit_ts=$(date -u -d "-60 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-60S +%Y-%m-%dT%H:%M:%SZ) - # Comment arrived 5 seconds ago (after trigger, after commit -> case 3) - comment_ts=$(date -u -d "-5 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-5S +%Y-%m-%dT%H:%M:%SZ) - - # Start with startup_case=1 (no comments initially), then comments arrive - # Provide a trigger comment to proceed past timeout checks - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 60 -started_at: $commit_ts -last_trigger_at: $trigger_ts -trigger_comment_id: 999 -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: $commit_ts ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns bot comments (simulating comments arriving) - # IMPORTANT: poll-pr-reviews.sh expects RAW GitHub API format (with .user.login) - # check-pr-reviewer-status.sh uses --jq so needs transformed format - # Use COMMENT_TS environment variable for dynamic timestamp - cat > "$mock_bin/gh" << MOCK_GH -#!/bin/bash -# Dynamic comment timestamp from test setup -COMMENT_TS="$comment_ts" -COMMIT_TS="$commit_ts" - -# Check if --jq is in arguments and what type of jq expression -HAS_JQ=false -JQ_RETURNS_ARRAY=false -ARGS=("\$@") -for ((i=0; i<\${#ARGS[@]}; i++)); do - if [[ "\${ARGS[i]}" == "--jq" || "\${ARGS[i]}" == "-q" ]]; then - HAS_JQ=true - # Check next argument for jq expression starting with [ - next_idx=\$((i + 1)) - if [[ \$next_idx -lt \${#ARGS[@]} ]]; then - next_arg="\${ARGS[next_idx]}" - if [[ "\$next_arg" == "["* ]]; then - JQ_RETURNS_ARRAY=true - fi - fi - fi -done - -case "\$1" in - repo) - # check-pr-reviewer-status.sh needs repo owner/name with jq transformation - if [[ "\$*" == *"--json owner,name"* ]] || [[ "\$*" == *"--json owner"* && "\$*" == *"--json name"* ]]; then - if [[ "\$HAS_JQ" == "true" ]]; then - # jq '.owner.login + "/" + .name' returns "owner/repo" - echo "testowner/testrepo" - else - echo '{"owner": {"login": "testowner"}, "name": "testrepo"}' - fi - exit 0 - fi - if [[ "\$*" == *"--json parent"* ]]; then - if [[ "\$HAS_JQ" == "true" ]]; then - # jq '.parent.owner.login + "/" + .parent.name' returns empty for non-fork - echo "" - else - echo '{"parent": null}' - fi - exit 0 - fi - ;; - api) - if [[ "\$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return codex comment - format depends on whether --jq is used and its pattern - if [[ "\$2" == *"/issues/"*"/comments"* ]]; then - if [[ "\$HAS_JQ" == "true" ]]; then - if [[ "\$JQ_RETURNS_ARRAY" == "true" ]]; then - # check-pr-reviewer-status.sh uses '[.[] | {...}]' - returns array - echo "[{\"author\":\"chatgpt-codex-connector[bot]\",\"created_at\":\"\$COMMENT_TS\",\"body\":\"Found issues\"}]" - else - # stop hook uses '.[] | {...}' then 'jq -s' - returns individual objects - echo "{\"id\":1001,\"author\":\"chatgpt-codex-connector[bot]\",\"created_at\":\"\$COMMENT_TS\",\"body\":\"Found issues\"}" - fi - else - # Raw GitHub API format for poll-pr-reviews.sh - echo "[{\"id\":1001,\"user\":{\"login\":\"chatgpt-codex-connector[bot]\",\"type\":\"Bot\"},\"created_at\":\"\$COMMENT_TS\",\"body\":\"Found issues\"}]" - fi - exit 0 - fi - if [[ "\$2" == *"/pulls/"*"/reviews"* ]]; then - echo '[]' - exit 0 - fi - if [[ "\$2" == *"/pulls/"*"/comments"* ]]; then - echo '[]' - exit 0 - fi - if [[ "\$2" == *"/reactions"* ]]; then - echo '[]' - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - # PR existence check: gh pr view --repo ... --json number -q .number - if [[ "\$*" == *"number"* ]] && [[ "\$*" != *"commits"* ]]; then - echo '{"number": 123}' - exit 0 - fi - if [[ "\$*" == *"commits"* ]] && [[ "\$*" == *"headRefOid"* ]]; then - # For check-pr-reviewer-status.sh: returns jq-processed format - # {sha: .headRefOid, date: (.commits | last | .committedDate)} - echo "{\"sha\":\"abc123\",\"date\":\"\$COMMIT_TS\"}" - exit 0 - fi - if [[ "\$*" == *"commits"* ]] && [[ "\$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used (stop hook commit fetch) - echo "\$COMMIT_TS" - exit 0 - fi - if [[ "\$*" == *"commits"* ]]; then - # Commit before the comment - echo "{\"commits\":[{\"committedDate\":\"\$COMMIT_TS\"}]}" - exit 0 - fi - if [[ "\$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook with timeout and capture output for debugging - local hook_output - hook_output=$(timeout 15 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT" 2>&1) || true - - # Check if startup_case was updated in state file (or approve-state.md if all bots approved/timed out) - local new_case state_file - if [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" ]]; then - state_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" - elif [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - state_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" - else - state_file="" - fi - - if [[ -n "$state_file" ]]; then - new_case=$(grep "^startup_case:" "$state_file" 2>/dev/null | sed 's/startup_case: *//' | tr -d ' ' || true) - else - new_case="" - fi - - # Verify startup_case is present in the updated state file (confirms re-evaluation code path ran) - if [[ -n "$new_case" ]]; then - pass "T-STOPHOOK-11: Hook completes with startup_case in state" - else - fail "T-STOPHOOK-11: startup_case should be preserved in state" "startup_case present" "got: empty/missing" - fi - - # VERIFICATION: Assert startup_case changed from initial value (1) to expected value - # Mock setup: codex comment at 10:05:00Z, commit at 09:00:00Z (before comment) - # Expected: Case 3 (all reviewers commented, no new commits after) - if [[ -n "$new_case" && "$new_case" != "1" ]]; then - pass "T-STOPHOOK-11a: startup_case changed from 1 to $new_case" - elif [[ -n "$new_case" && "$new_case" == "1" ]]; then - # Debug: check if stop hook re-evaluated startup_case - if echo "$hook_output" | grep -qi "Startup case changed"; then - # Re-evaluation ran but case didn't change in state file - state write issue - fail "T-STOPHOOK-11a: startup_case changed in hook but not persisted" "!= 1" "case_change logged but state=1" - elif echo "$hook_output" | grep -qi "check-pr-reviewer-status\|NEW_REVIEWER_STATUS"; then - # Re-evaluation script was called - fail "T-STOPHOOK-11a: startup_case check ran but returned 1" "!= 1" "got: 1" - else - # Re-evaluation didn't run - likely exited early - local exit_reason - exit_reason=$(echo "$hook_output" | grep -i "exit\|block\|timeout" | head -3 || echo "unknown") - fail "T-STOPHOOK-11a: startup_case re-evaluation not reached" "!= 1" "got: 1, exit: $exit_reason" - fi - else - fail "T-STOPHOOK-11a: startup_case should be present and changed" "number != 1" "got: empty" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Fork PR support - stop hook resolves base repo from parent -test_stophook_fork_pr_base_repo_resolution() { - local test_subdir="$TEST_DIR/stophook_fork_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 456 -start_branch: test-branch -configured_bots: - - codex -active_bots: -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that simulates a fork scenario: - # - Current repo (fork) doesn't have PR 456 - # - Parent repo (upstream) has PR 456 - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash -# Track which repo we're querying -FORK_REPO="forkuser/forkrepo" -UPSTREAM_REPO="upstreamowner/upstreamrepo" - -case "$1" in - repo) - if [[ "$*" == *"--json owner,name"* ]]; then - # Current repo is the fork - echo "forkuser/forkrepo" - exit 0 - fi - if [[ "$*" == *"--json parent"* ]]; then - # Return parent (upstream) repo - echo "upstreamowner/upstreamrepo" - exit 0 - fi - ;; - pr) - # Check which --repo was specified - if [[ "$*" == *"--repo forkuser/forkrepo"* ]]; then - # Fork doesn't have PR 456 - return empty/error - exit 1 - fi - if [[ "$*" == *"--repo upstreamowner/upstreamrepo"* ]]; then - # Upstream has PR 456 - if [[ "$*" == *"number"* ]] && [[ "$*" != *"commits"* ]]; then - echo '{"number": 456}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - echo "2026-01-18T10:00:00Z" - exit 0 - fi - fi - # Default: try to handle without --repo (should fail for forks) - if [[ "$*" != *"--repo"* ]]; then - exit 1 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - else - echo "/tmp/git" - fi - ;; - status) echo "" ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook - should resolve PR from parent repo - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Should not fail with "PR not found" because it should have found it in parent repo - # And since active_bots is empty, it should approve - if echo "$hook_output" | grep -qi "approved\|complete"; then - pass "T-STOPHOOK-12: Fork PR support - resolved PR from parent repo" - elif [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - pass "T-STOPHOOK-12: Fork PR support - created approve-state.md" - else - # Check if it at least didn't fail with "PR not found" - if ! echo "$hook_output" | grep -qi "pr.*not.*found\|no.*pull.*request"; then - pass "T-STOPHOOK-12: Fork PR support - did not fail on PR lookup" - else - fail "T-STOPHOOK-12: Fork PR should resolve from parent" "success" "got: $hook_output" - fi - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Goal tracker - resolved count stays 0 when some bots have issues -test_stophook_goal_tracker_mixed_approval() { - local test_subdir="$TEST_DIR/stophook_goal_tracker_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Use dynamic timestamps to ensure polling doesn't time out immediately - # Timeline: commit -> trigger -> bot comments (all recent, within poll_timeout) - local trigger_ts commit_ts claude_ts codex_ts - # Trigger was 10 seconds ago - trigger_ts=$(date -u -d "-10 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-10S +%Y-%m-%dT%H:%M:%SZ) - # Commit was 60 seconds ago (before trigger) - commit_ts=$(date -u -d "-60 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-60S +%Y-%m-%dT%H:%M:%SZ) - # Claude comment arrived 5 seconds ago (after trigger) - claude_ts=$(date -u -d "-5 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-5S +%Y-%m-%dT%H:%M:%SZ) - # Codex comment arrived 4 seconds ago (after trigger) - codex_ts=$(date -u -d "-4 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-4S +%Y-%m-%dT%H:%M:%SZ) - - # State with two bots configured - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude - - codex -active_bots: - - claude - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 60 -started_at: $commit_ts -last_trigger_at: $trigger_ts -trigger_comment_id: 999 -startup_case: 3 -latest_commit_sha: abc123 -latest_commit_at: $commit_ts ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - # Create initial goal tracker - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/goal-tracker.md" << EOF -# PR Loop Goal Tracker - -## Stats -- Issues Found: 0 -- Issues Resolved: 0 - -## Log -| Round | Timestamp | Event | -|-------|-----------|-------| -| 0 | $commit_ts | Loop started | -EOF - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns: - # - claude: APPROVE (LGTM) - # - codex: ISSUES (has issues) - cat > "$mock_bin/gh" << MOCK_GH -#!/bin/bash -# Dynamic timestamps from test setup -CLAUDE_TS="$claude_ts" -CODEX_TS="$codex_ts" -COMMIT_TS="$commit_ts" - -HAS_JQ=false -for arg in "\$@"; do - if [[ "\$arg" == "--jq" || "\$arg" == "-q" ]]; then - HAS_JQ=true - break - fi -done - -case "\$1" in - repo) - if [[ "\$*" == *"--json owner,name"* ]]; then - echo "testowner/testrepo" - exit 0 - fi - if [[ "\$*" == *"--json parent"* ]]; then - echo "" - exit 0 - fi - ;; - api) - if [[ "\$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return comments from both bots - if [[ "\$2" == *"/issues/"*"/comments"* ]]; then - if [[ "\$HAS_JQ" == "true" ]]; then - # Claude approves, Codex has issues - echo "{\"id\": 1, \"author\": \"claude[bot]\", \"created_at\": \"\$CLAUDE_TS\", \"body\": \"LGTM! No issues found.\"}" - echo "{\"id\": 2, \"author\": \"chatgpt-codex-connector[bot]\", \"created_at\": \"\$CODEX_TS\", \"body\": \"Found 2 issues that need fixing.\"}" - else - echo "[{\"id\": 1, \"user\": {\"login\": \"claude[bot]\"}, \"created_at\": \"\$CLAUDE_TS\", \"body\": \"LGTM! No issues found.\"},{\"id\": 2, \"user\": {\"login\": \"chatgpt-codex-connector[bot]\"}, \"created_at\": \"\$CODEX_TS\", \"body\": \"Found 2 issues that need fixing.\"}]" - fi - exit 0 - fi - if [[ "\$2" == *"/reactions"* ]]; then - # Return eyes for claude (no need for this test but keep consistent) - echo "[]" - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - # PR existence check: gh pr view --repo ... --json number -q .number - if [[ "\$*" == *"number"* ]] && [[ "\$*" != *"commits"* ]]; then - echo '{"number": 123}' - exit 0 - fi - if [[ "\$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - if [[ "\$*" == *"commits"* ]] && [[ "\$*" == *"--jq"* ]]; then - echo "\$COMMIT_TS" - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - # Mock codex that outputs mixed approval - cat > "$mock_bin/codex" << 'MOCK_CODEX' -#!/bin/bash -# Mock codex output: claude approves, codex has issues -cat << 'CODEX_OUTPUT' -# PR Review Validation - -### Per-Bot Status -| Bot | Status | Summary | -|-----|--------|---------| -| claude | APPROVE | No issues found | -| codex | ISSUES | Found 2 issues that need fixing | - -### Issues Found (if any) -1. Issue from codex: Missing error handling -2. Issue from codex: Needs tests - -### Approved Bots (to remove from active_bots) -- claude - -### Final Recommendation -ISSUES_REMAINING -CODEX_OUTPUT -MOCK_CODEX - chmod +x "$mock_bin/codex" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook - local hook_output - hook_output=$(timeout 30 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT") || true - - # Verify that ISSUES_RESOLVED_COUNT is 0, not inflated to ISSUES_FOUND_COUNT - # The goal tracker should show issues found > 0 but resolved = 0 - # (because codex still has issues, even though claude approved) - - # Check the feedback file or check file for the correct issue counts - local check_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-1-pr-check.md" - if [[ -f "$check_file" ]]; then - # Check that issues were found - if grep -q "Issues Found\|ISSUES" "$check_file" 2>/dev/null; then - pass "T-STOPHOOK-13: Goal tracker correctly identifies issues" - else - fail "T-STOPHOOK-13: Check file should contain issues" "issues listed" "not found" - fi - else - # Check file may not exist if polling didn't complete - # Check output instead - if echo "$hook_output" | grep -qi "issues.*remaining\|ISSUES_REMAINING"; then - pass "T-STOPHOOK-13: Goal tracker correctly identifies issues (via output)" - else - fail "T-STOPHOOK-13: Should detect issues remaining" "issues_remaining" "got: $hook_output" - fi - fi - - # VERIFICATION: The key fix - resolved count should NOT be inflated - # Since we can't directly check ISSUES_RESOLVED_COUNT variable, verify the behavior: - # - claude approved (removed from active_bots) - # - codex has issues (stays in active_bots) - # - loop should continue (not complete) because codex still has issues - - if [[ ! -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - pass "T-STOPHOOK-13a: Loop continues with mixed approval (not prematurely completed)" - else - fail "T-STOPHOOK-13a: Loop should not complete with mixed approval" "no approve-state.md" "approve-state.md exists" - fi - - # Check that claude was removed from active_bots but codex remains - local state_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" - if [[ -f "$state_file" ]]; then - local active_bots_content - active_bots_content=$(sed -n '/^active_bots:/,/^[a-z_]*:/p' "$state_file" | grep -E '^\s*-' || true) - - if echo "$active_bots_content" | grep -q "codex"; then - pass "T-STOPHOOK-13b: Codex remains in active_bots (has issues)" - else - fail "T-STOPHOOK-13b: Codex should remain in active_bots" "codex in list" "got: $active_bots_content" - fi - - if ! echo "$active_bots_content" | grep -q "claude"; then - pass "T-STOPHOOK-13c: Claude removed from active_bots (approved)" - else - fail "T-STOPHOOK-13c: Claude should be removed from active_bots" "no claude" "got: $active_bots_content" - fi - fi - - unset CLAUDE_PROJECT_DIR -} - -# Run stop-hook integration tests -test_stophook_force_push_rejects_old_trigger -test_stophook_case1_no_trigger_required -test_stophook_approve_creates_state -test_stophook_step6_unpushed_commits -test_stophook_step65_force_push_detection -test_stophook_step7_missing_trigger -test_stophook_bot_timeout_auto_remove -test_stophook_codex_thumbsup_approval -test_stophook_claude_eyes_timeout -test_stophook_dynamic_startup_case_update -test_stophook_fork_pr_base_repo_resolution -test_stophook_goal_tracker_mixed_approval - -} diff --git a/tests/test-pr-loop-system.sh b/tests/test-pr-loop-system.sh deleted file mode 100755 index e124cb6f..00000000 --- a/tests/test-pr-loop-system.sh +++ /dev/null @@ -1,1904 +0,0 @@ -#!/bin/bash -# -# Test runner for PR loop system -# -# Runs all tests in the tests/ directory using the mock gh CLI -# -# Usage: -# ./tests/run-tests.sh [test-name] -# -# Environment: -# TEST_VERBOSE=1 - Show verbose output - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -# Test configuration -TESTS_DIR="$SCRIPT_DIR" -MOCKS_DIR="$TESTS_DIR/mocks" -FIXTURES_DIR="$TESTS_DIR/fixtures" -TEST_VERBOSE="${TEST_VERBOSE:-0}" - -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[0;33m' -NC='\033[0m' # No Color - -# Counters -TESTS_RUN=0 -TESTS_PASSED=0 -TESTS_FAILED=0 - -# Test helper functions -log_test() { - echo -e "${YELLOW}[TEST]${NC} $1" -} - -log_pass() { - echo -e "${GREEN}[PASS]${NC} $1" - TESTS_PASSED=$((TESTS_PASSED + 1)) -} - -log_fail() { - echo -e "${RED}[FAIL]${NC} $1" - TESTS_FAILED=$((TESTS_FAILED + 1)) -} - -# Setup test environment -setup_test_env() { - # Add mocks to PATH - export PATH="$MOCKS_DIR:$PATH" - export MOCK_GH_FIXTURES_DIR="$FIXTURES_DIR" - - # Create temp directory for tests - export TEST_TEMP_DIR=$(mktemp -d) - export CLAUDE_PROJECT_DIR="$TEST_TEMP_DIR" - - # Initialize git repo for tests - ( - cd "$TEST_TEMP_DIR" - git init -q - git config user.email "test@example.com" - git config user.name "Test User" - git config commit.gpgsign false - echo "# Test" > README.md - git add README.md - git commit -q -m "Initial commit" - ) >/dev/null 2>&1 -} - -# Cleanup test environment -cleanup_test_env() { - if [[ -n "${TEST_TEMP_DIR:-}" && -d "$TEST_TEMP_DIR" ]]; then - rm -rf "$TEST_TEMP_DIR" - fi -} - -# Run a test function -run_test() { - local test_name="$1" - local test_func="$2" - - TESTS_RUN=$((TESTS_RUN + 1)) - log_test "$test_name" - - setup_test_env - - # Run test in subshell to isolate failures - local result=0 - ( - cd "$TEST_TEMP_DIR" - $test_func - ) && result=0 || result=$? - - if [[ $result -eq 0 ]]; then - log_pass "$test_name" - else - log_fail "$test_name (exit code: $result)" - fi - - cleanup_test_env -} - -# ======================================== -# Test: Mutual Exclusion -# ======================================== - -test_mutual_exclusion_rlcr_blocks_pr() { - # Create an active RLCR loop - mkdir -p .humanize/rlcr/2026-01-18_12-00-00 - echo "--- -current_round: 1 -max_iterations: 10 ----" > .humanize/rlcr/2026-01-18_12-00-00/state.md - - # Try to start a PR loop - should fail - export MOCK_GH_PR_NUMBER=123 - export MOCK_GH_PR_STATE="OPEN" - - local result - result=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --codex 2>&1) && return 1 || true - - # Should contain error about RLCR loop active - echo "$result" | grep -q "RLCR loop is already active" || return 1 -} - -test_mutual_exclusion_pr_blocks_rlcr() { - # Create an active PR loop - mkdir -p .humanize/pr-loop/2026-01-18_12-00-00 - echo "--- -current_round: 0 -max_iterations: 42 -pr_number: 123 ----" > .humanize/pr-loop/2026-01-18_12-00-00/state.md - - # Try to start an RLCR loop - should fail - echo "# Test Plan" > test-plan.md - - local result - result=$("$PROJECT_ROOT/scripts/setup-rlcr-loop.sh" test-plan.md 2>&1) && return 1 || true - - # Should contain error about PR loop active - echo "$result" | grep -q "PR loop is already active" || return 1 -} - -# ======================================== -# Test: Check PR Reviewer Status -# ======================================== - -test_reviewer_status_case1_no_comments() { - # Fixture with no bot comments - must clear ALL comment sources - echo "[]" > "$FIXTURES_DIR/issue-comments.json" - echo "[]" > "$FIXTURES_DIR/review-comments.json" - echo "[]" > "$FIXTURES_DIR/pr-reviews.json" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex") - - # Should return case 1 - local test_passed=true - echo "$result" | jq -e '.case == 1' || test_passed=false - - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM! Code looks good.","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - $test_passed -} - -test_reviewer_status_case2_partial_comments() { - # Only claude has commented - must clear codex comments too - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo "[]" > "$FIXTURES_DIR/review-comments.json" - echo "[]" > "$FIXTURES_DIR/pr-reviews.json" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex") - - # Should return case 2 (partial) - local test_passed=true - echo "$result" | jq -e '.case == 2' || test_passed=false - echo "$result" | jq -e '.reviewers_missing | contains(["codex"])' || test_passed=false - - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM! Code looks good.","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - $test_passed -} - -# ======================================== -# Test: Codex +1 Detection -# ======================================== - -test_codex_thumbsup_detected() { - local result - result=$("$PROJECT_ROOT/scripts/check-bot-reactions.sh" codex-thumbsup 123) - - # Should find the +1 reaction - echo "$result" | jq -e '.content == "+1"' || return 1 -} - -test_codex_thumbsup_with_after_filter() { - # Test --after filter - reaction is at 11:10:00Z, we filter for after 12:00:00Z - # So no reaction should be found - local result - if "$PROJECT_ROOT/scripts/check-bot-reactions.sh" codex-thumbsup 123 --after "2026-01-18T12:00:00Z" 2>/dev/null; then - # Should NOT succeed - reaction is before the filter time - return 1 - fi - # Correctly failed - reaction is before filter time - return 0 -} - -# ======================================== -# Test: Claude Eyes Detection -# ======================================== - -test_claude_eyes_detected() { - # Use delay 0 and retry 1 for fast test - local result - result=$("$PROJECT_ROOT/scripts/check-bot-reactions.sh" claude-eyes 12345 --retry 1 --delay 0) - - # Should find the eyes reaction - echo "$result" | jq -e '.content == "eyes"' || return 1 -} - -# ======================================== -# Test: PR Reviews Detection (PR submissions) -# ======================================== - -test_reviewer_status_includes_pr_reviews() { - # Set up fixture where codex has APPROVED via PR review (not comment) - echo "[]" > "$FIXTURES_DIR/issue-comments.json" - echo "[]" > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM! Code looks good.","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "codex") - - # Codex should be in reviewers_commented because of PR review - local test_passed=true - echo "$result" | jq -e '.reviewers_commented | contains(["codex"])' || test_passed=false - - $test_passed -} - -# ======================================== -# Test: Phase Detection -# ======================================== - -test_phase_detection_approved() { - # Source monitor-common.sh (located in scripts/lib/) - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake session dir with approve-state.md - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - touch "$session_dir/approve-state.md" - - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "approved" ]] || return 1 -} - -test_phase_detection_waiting_initial() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake session dir with state.md at round 0 and startup_case 1 - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - cat > "$session_dir/state.md" << 'EOF' ---- -current_round: 0 -startup_case: 1 ---- -EOF - - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "waiting_initial_review" ]] || return 1 -} - -test_phase_detection_waiting_reviewer() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake session dir with state.md at round 1 - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - cat > "$session_dir/state.md" << 'EOF' ---- -current_round: 1 -startup_case: 2 ---- -EOF - - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "waiting_reviewer" ]] || return 1 -} - -# ======================================== -# Test: Goal Tracker Parsing -# ======================================== - -test_goal_tracker_parsing() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake goal tracker file - local tracker_file="$TEST_TEMP_DIR/goal-tracker.md" - cat > "$tracker_file" << 'EOF' -# Goal Tracker - -### Ultimate Goal -Get all bots to approve the PR. - -### Acceptance Criteria - -| AC | Description | -|----|-------------| -| AC-1 | Bot claude approves | -| AC-2 | Bot codex approves | - -### Completed and Verified - -| AC | Description | -|----|-------------| -| AC-1 | Completed | - -#### Active Tasks - -| Task | Description | Status | -|------|-------------|--------| -| Fix bug | Fix the bug | pending | -| Add test | Add a test | completed | - -### Explicitly Deferred - -| Task | Description | -|------|-------------| - -### Open Issues - -| Issue | Description | -|-------|-------------| - -EOF - - local result - result=$(parse_goal_tracker "$tracker_file") - - # Should return: total_acs|completed_acs|active_tasks|completed_tasks|deferred_tasks|open_issues|goal_summary - # Expected: 2|1|1|0|0|0|Get all bots to approve the PR. - - local total_acs completed_acs active_tasks - IFS='|' read -r total_acs completed_acs active_tasks _ _ _ _ <<< "$result" - - [[ "$total_acs" == "2" ]] || { echo "Expected total_acs=2, got $total_acs"; return 1; } - [[ "$completed_acs" == "1" ]] || { echo "Expected completed_acs=1, got $completed_acs"; return 1; } - [[ "$active_tasks" == "1" ]] || { echo "Expected active_tasks=1, got $active_tasks"; return 1; } -} - -# ======================================== -# Test: PR Goal Tracker Parsing -# ======================================== - -test_pr_goal_tracker_parsing() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake PR goal tracker file - local tracker_file="$TEST_TEMP_DIR/pr-goal-tracker.md" - cat > "$tracker_file" << 'EOF' -# PR Goal Tracker - -## Total Statistics - -- Total Issues Found: 5 -- Total Issues Resolved: 3 -- Remaining: 2 - -## Issue Summary - -| ID | Reviewer | Round | Status | Description | -|----|----------|-------|--------|-------------| -| 1 | Claude | 0 | resolved | Issue one | -| 2 | Claude | 0 | resolved | Issue two | -| 3 | Codex | 1 | open | Issue three | -| 4 | Codex | 1 | resolved | Issue four | -| 5 | Claude | 2 | open | Issue five | - -EOF - - local result - result=$(humanize_parse_pr_goal_tracker "$tracker_file") - - # Should return: total_issues|resolved_issues|remaining_issues|last_reviewer - # Expected: 5|3|2|Claude - - local total_issues resolved_issues remaining_issues last_reviewer - IFS='|' read -r total_issues resolved_issues remaining_issues last_reviewer <<< "$result" - - [[ "$total_issues" == "5" ]] || { echo "Expected total_issues=5, got $total_issues"; return 1; } - [[ "$resolved_issues" == "3" ]] || { echo "Expected resolved_issues=3, got $resolved_issues"; return 1; } - [[ "$remaining_issues" == "2" ]] || { echo "Expected remaining_issues=2, got $remaining_issues"; return 1; } - [[ "$last_reviewer" == "Claude" ]] || { echo "Expected last_reviewer=Claude, got $last_reviewer"; return 1; } -} - -# ======================================== -# Test: State File Detection -# ======================================== - -test_state_file_detection_active() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create active state - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - echo "current_round: 0" > "$session_dir/state.md" - - local result - result=$(monitor_find_state_file "$session_dir") - - # Should return state.md with active status - echo "$result" | grep -q "state.md|active" || { echo "Expected active state, got $result"; return 1; } -} - -test_state_file_detection_approve() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create approve state (no state.md, only approve-state.md) - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - echo "approved" > "$session_dir/approve-state.md" - - local result - result=$(monitor_find_state_file "$session_dir") - - # Should return approve-state.md with approve status - echo "$result" | grep -q "approve-state.md|approve" || { echo "Expected approve state, got $result"; return 1; } -} - -# ======================================== -# Test: Phase Detection - Cancelled -# ======================================== - -test_phase_detection_cancelled() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake session dir with cancel-state.md - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - touch "$session_dir/cancel-state.md" - - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "cancelled" ]] || { echo "Expected cancelled, got $phase"; return 1; } -} - -test_phase_detection_maxiter() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake session dir with maxiter-state.md - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - touch "$session_dir/maxiter-state.md" - - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "maxiter" ]] || { echo "Expected maxiter, got $phase"; return 1; } -} - -# ======================================== -# Test: Startup Case Detection -# ======================================== - -test_reviewer_status_case3_all_commented() { - # All bots have commented - should be case 3 - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex") - - # Should return case 3 (all bots commented) - local test_passed=true - echo "$result" | jq -e '.case == 3' || test_passed=false - - $test_passed -} - -# ======================================== -# Test: update_pr_goal_tracker helper -# ======================================== - -test_update_pr_goal_tracker() { - # Source loop-common.sh - source "$PROJECT_ROOT/hooks/lib/loop-common.sh" - - # Create a goal tracker file - local tracker_file="$TEST_TEMP_DIR/goal-tracker.md" - cat > "$tracker_file" << 'EOF' -# PR Goal Tracker - -## Total Statistics - -- Total Issues Found: 2 -- Total Issues Resolved: 1 -- Remaining: 1 - -## Issue Summary -EOF - - # Update with new bot results (JSON format: issues=new found, resolved=new resolved) - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 3, "resolved": 2, "bot": "Codex"}' - - # Verify update - should add 3 found, 2 resolved (new totals: 5 found, 3 resolved, 2 remaining) - grep -q "Total Issues Found: 5" "$tracker_file" || { echo "Expected 5 total found"; return 1; } - grep -q "Total Issues Resolved: 3" "$tracker_file" || { echo "Expected 3 total resolved"; return 1; } - grep -q "Remaining: 2" "$tracker_file" || { echo "Expected 2 remaining"; return 1; } -} - -# ======================================== -# Test: Unpushed Commits Detection -# ======================================== - -test_unpushed_commits_detected() { - # Create a git repo with unpushed commits - local test_dir="$TEST_TEMP_DIR" - cd "$test_dir" - - # Initialize git repo and create a commit - git init -q - git config user.email "test@example.com" - git config user.name "Test User" - echo "# Test" > README.md - git add README.md - git commit -q -m "Initial commit" - - # Create a fake remote tracking branch (simulates having unpushed commits) - # This creates a local branch that pretends to track origin/main - git branch --set-upstream-to=HEAD 2>/dev/null || true - - # Add another commit (this will be "unpushed") - echo "new content" >> README.md - git add README.md - git commit -q -m "New commit" - - # Check git status for unpushed detection pattern - local ahead_count=$(git status -sb 2>/dev/null | grep -oE '\[ahead [0-9]+\]' | grep -oE '[0-9]+' || echo "0") - - # Test passes if we can detect we have local commits - # Note: In this test setup, we can't truly simulate upstream, so we verify the pattern matching works - [[ -n "$(git log --oneline -1)" ]] || return 1 -} - -# ======================================== -# Test: Force Push Detection Logic -# ======================================== - -test_force_push_ancestry_check() { - # Test git merge-base --is-ancestor behavior - local test_dir="$TEST_TEMP_DIR" - cd "$test_dir" - - # Create a git repo with two branches - git init -q - git config user.email "test@example.com" - git config user.name "Test User" - - # Create initial commit - echo "v1" > file.txt - git add file.txt - git commit -q -m "Initial" - local INITIAL_SHA=$(git rev-parse HEAD) - - # Create second commit - echo "v2" >> file.txt - git add file.txt - git commit -q -m "Second" - local SECOND_SHA=$(git rev-parse HEAD) - - # Test: INITIAL_SHA should be ancestor of SECOND_SHA - git merge-base --is-ancestor "$INITIAL_SHA" "$SECOND_SHA" || { echo "Expected $INITIAL_SHA to be ancestor of $SECOND_SHA"; return 1; } - - # Test: SECOND_SHA should NOT be ancestor of INITIAL_SHA - if git merge-base --is-ancestor "$SECOND_SHA" "$INITIAL_SHA" 2>/dev/null; then - echo "Expected $SECOND_SHA to NOT be ancestor of $INITIAL_SHA" - return 1 - fi - - return 0 -} - -# ======================================== -# Test: Approve State Creation -# ======================================== - -test_approve_state_detection() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create session dir with approve-state.md - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - echo "approved" > "$session_dir/approve-state.md" - - # Phase should be "approved" - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "approved" ]] || { echo "Expected phase=approved, got $phase"; return 1; } - - # State file detection should also work - local state_info - state_info=$(monitor_find_state_file "$session_dir") - - echo "$state_info" | grep -q "approve" || { echo "Expected approve in state_info, got $state_info"; return 1; } -} - -# ======================================== -# Test: Goal Tracker Schema -# ======================================== - -test_goal_tracker_schema() { - # Read the goal tracker init template - local template_file="$PROJECT_ROOT/prompt-template/pr-loop/goal-tracker-initial.md" - - # Verify required sections exist per plan - grep -q "## Issue Summary" "$template_file" || { echo "Missing Issue Summary section"; return 1; } - grep -q "## Total Statistics" "$template_file" || { echo "Missing Total Statistics section"; return 1; } - grep -q "## Issue Log" "$template_file" || { echo "Missing Issue Log section"; return 1; } - - # Verify Total Statistics has required fields - grep -q "Total Issues Found:" "$template_file" || { echo "Missing Total Issues Found field"; return 1; } - grep -q "Total Issues Resolved:" "$template_file" || { echo "Missing Total Issues Resolved field"; return 1; } - grep -q "Remaining:" "$template_file" || { echo "Missing Remaining field"; return 1; } -} - -# ======================================== -# Test: Dynamic Startup Case -# ======================================== - -test_startup_case_4_5_detection() { - # Test that check-pr-reviewer-status.sh detects case 4/5 (commits after reviews) - # Set up fixtures: both bots commented, but there's a newer commit - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T10:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T10:15:00Z","body":"LGTM","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - # Note: The mock would need to simulate a newer commit timestamp - # For this test, we verify the script returns valid JSON - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex" 2>/dev/null) || true - - # Should return valid JSON with case field - echo "$result" | jq -e '.case' >/dev/null || { echo "Invalid JSON or missing case field"; return 1; } -} - -# ======================================== -# Test: Goal Tracker Update with Issue Summary Row -# ======================================== - -test_goal_tracker_update_adds_row() { - # Source loop-common.sh - source "$PROJECT_ROOT/hooks/lib/loop-common.sh" - - # Create a goal tracker file with proper schema - local tracker_file="$TEST_TEMP_DIR/goal-tracker.md" - cat > "$tracker_file" << 'EOF' -# PR Review Goal Tracker - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | - -## Total Statistics - -- Total Issues Found: 0 -- Total Issues Resolved: 0 -- Remaining: 0 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* -EOF - - # Update with new bot results - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 2, "resolved": 0, "bot": "Codex"}' - - # Verify Issue Log has Round 1 entry - grep -q "### Round 1" "$tracker_file" || { echo "Missing Round 1 in Issue Log"; return 1; } - - # Verify totals updated - grep -q "Total Issues Found: 2" "$tracker_file" || { echo "Expected 2 total found"; return 1; } -} - -# ======================================== -# Test: Goal Tracker Update Idempotency -# ======================================== - -test_goal_tracker_update_idempotent() { - # Source loop-common.sh - source "$PROJECT_ROOT/hooks/lib/loop-common.sh" - - # Create a goal tracker file with proper schema - local tracker_file="$TEST_TEMP_DIR/goal-tracker.md" - cat > "$tracker_file" << 'EOF' -# PR Review Goal Tracker - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | - -## Total Statistics - -- Total Issues Found: 0 -- Total Issues Resolved: 0 -- Remaining: 0 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* -EOF - - # First update - should succeed - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 3, "resolved": 0, "bot": "Codex"}' - - # Verify first update worked - grep -q "Total Issues Found: 3" "$tracker_file" || { echo "First update failed - expected 3 total found"; return 1; } - - # Second update with SAME round AND SAME bot - should be SKIPPED (idempotent) - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 5, "resolved": 0, "bot": "Codex"}' - - # Totals should still be 3 (not 8) because round 1 was already recorded - grep -q "Total Issues Found: 3" "$tracker_file" || { echo "Idempotency failed - totals changed on duplicate update"; return 1; } - - # Count Issue Summary rows - should only have 2 (Round 0 + Round 1) - local row_count=$(grep -cE '^\|[[:space:]]*[0-9]+[[:space:]]*\|' "$tracker_file") - [[ "$row_count" -eq 2 ]] || { echo "Idempotency failed - expected 2 rows, got $row_count"; return 1; } -} - -# ======================================== -# Test: Shared Monitor - Find Latest Session -# ======================================== - -test_shared_monitor_find_latest_session() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create session directories with different timestamps - local loop_dir="$TEST_TEMP_DIR/.humanize/pr-loop" - mkdir -p "$loop_dir/2026-01-18_10-00-00" - mkdir -p "$loop_dir/2026-01-18_12-00-00" - mkdir -p "$loop_dir/2026-01-18_11-00-00" - - # Test that the latest session is found - local result - result=$(monitor_find_latest_session "$loop_dir") - - [[ "$(basename "$result")" == "2026-01-18_12-00-00" ]] || { - echo "Expected 2026-01-18_12-00-00, got $(basename "$result")" - return 1 - } -} - -# ======================================== -# Test: Shared Monitor - Find State File -# ======================================== - -test_shared_monitor_find_state_file() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - local session_dir="$TEST_TEMP_DIR/session" - mkdir -p "$session_dir" - - # Test 1: active state - touch "$session_dir/state.md" - local result - result=$(monitor_find_state_file "$session_dir") - local status="${result#*|}" - [[ "$status" == "active" ]] || { echo "Expected active, got $status"; return 1; } - - # Test 2: approve state (remove state.md, add approve-state.md) - rm "$session_dir/state.md" - touch "$session_dir/approve-state.md" - result=$(monitor_find_state_file "$session_dir") - status="${result#*|}" - [[ "$status" == "approve" ]] || { echo "Expected approve, got $status"; return 1; } - - # Test 3: no state file - rm "$session_dir/approve-state.md" - result=$(monitor_find_state_file "$session_dir") - status="${result#*|}" - [[ "$status" == "unknown" ]] || { echo "Expected unknown, got $status"; return 1; } -} - -# ======================================== -# Test: Shared Monitor - Get File Size -# ======================================== - -test_shared_monitor_get_file_size() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a test file with known content - local test_file="$TEST_TEMP_DIR/test-file.txt" - echo "Hello World" > "$test_file" - - local result - result=$(monitor_get_file_size "$test_file") - - # File should have content (size > 0) - [[ "$result" -gt 0 ]] || { echo "Expected size > 0, got $result"; return 1; } - - # Test non-existent file returns 0 - result=$(monitor_get_file_size "$TEST_TEMP_DIR/nonexistent.txt") - [[ "$result" -eq 0 ]] || { echo "Expected 0 for nonexistent file, got $result"; return 1; } -} - -# ======================================== -# Test: Phase Detection - Codex Analyzing (File Growth) -# ======================================== - -test_phase_detection_codex_analyzing() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - local session_dir="$TEST_TEMP_DIR/session" - mkdir -p "$session_dir" - - # Create state.md for active session - cat > "$session_dir/state.md" << 'EOF' ---- -current_round: 1 -startup_case: 2 ---- -EOF - - # Create a pr-check file with recent mtime (simulates Codex writing) - local check_file="$session_dir/round-1-pr-check.md" - echo "Analyzing PR..." > "$check_file" - # Touch with current time ensures mtime is within 10 seconds - touch "$check_file" - - # Test phase detection shows codex_analyzing - local result - result=$(get_pr_loop_phase "$session_dir") - [[ "$result" == "codex_analyzing" ]] || { - echo "Expected codex_analyzing, got $result" - return 1 - } - - # For the second test: make the file old and ensure cache shows no growth - # Touch with past timestamp - touch -d "2026-01-18 10:00:00" "$check_file" - - # Get the current file size and write it to cache twice - # (so second call sees no growth) - local size - size=$(stat -c%s "$check_file" 2>/dev/null || stat -f%z "$check_file" 2>/dev/null || echo 0) - local session_name=$(basename "$session_dir") - local cache_file="/tmp/humanize-phase-${session_name}-1.size" - echo "$size" > "$cache_file" - - # Now call again - same size, old mtime -> should be waiting_reviewer - result=$(get_pr_loop_phase "$session_dir") - [[ "$result" == "waiting_reviewer" ]] || { - echo "Expected waiting_reviewer after old mtime and no growth, got $result" - return 1 - } - - # Cleanup - rm -f "$cache_file" 2>/dev/null || true -} - -# ======================================== -# Test: Monitor Phase Display Output Assertions -# ======================================== - -# Helper: Run monitor with --once and capture output -run_monitor_once_capture_output() { - local session_dir="$1" - local project_dir="$2" - - # Create wrapper script that runs monitor and captures output - local wrapper="$project_dir/run_monitor_test.sh" - cat > "$wrapper" << 'WRAPPER_EOF' -#!/bin/bash -PROJECT_DIR="$1" -PROJECT_ROOT="$2" - -cd "$PROJECT_DIR" - -# Stub terminal commands for non-interactive mode -tput() { - case "$1" in - cols) echo "80" ;; - lines) echo "24" ;; - *) : ;; - esac -} -export -f tput -clear() { :; } -export -f clear - -# Disable ANSI colors for easier parsing -export NO_COLOR=1 - -# Source humanize.sh -source "$PROJECT_ROOT/scripts/humanize.sh" - -# Run monitor with --once flag -humanize monitor pr --once 2>&1 -WRAPPER_EOF - chmod +x "$wrapper" - - # Run and capture output - timeout 10 bash "$wrapper" "$project_dir" "$PROJECT_ROOT" 2>&1 || true -} - -# Test: Monitor displays "All reviews approved" for approved state -test_monitor_output_phase_approved() { - local test_dir="$TEST_TEMP_DIR/monitor_phase_approved" - mkdir -p "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" - - # Create approve-state.md (final approved state) - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/approve-state.md" << 'EOF' ---- -current_round: 1 -startup_case: 3 -pr_number: 123 -configured_bots: - - codex -active_bots: ---- -EOF - - # Create goal-tracker.md (required by monitor) - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/goal-tracker.md" << 'GOAL' -# Goal Tracker -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | -GOAL - - local output - output=$(run_monitor_once_capture_output "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" "$test_dir") - - # Assert output contains approved phase (require Phase: label) - if echo "$output" | grep -qi "Phase:.*approved\|Phase:.*All reviews"; then - return 0 - else - echo "Expected 'All reviews approved' in output, got: $(echo "$output" | head -20)" - return 1 - fi -} - -# Test: Monitor displays "Waiting for initial PR review" for waiting_initial_review state -test_monitor_output_phase_waiting_initial() { - local test_dir="$TEST_TEMP_DIR/monitor_phase_waiting" - mkdir -p "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" - - # Create state.md with startup_case=1, round=0 (waiting for initial review) - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/state.md" << 'EOF' ---- -current_round: 0 -startup_case: 1 -pr_number: 123 -configured_bots: - - codex - - claude -active_bots: - - codex - - claude ---- -EOF - - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/goal-tracker.md" << 'GOAL' -# Goal Tracker -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | -GOAL - - local output - output=$(run_monitor_once_capture_output "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" "$test_dir") - - # Assert output contains waiting phase (require Phase: label) - # For startup_case=1 (no comments yet), the loop is waiting for initial review - if echo "$output" | grep -qi "Phase:.*waiting"; then - return 0 - else - echo "Expected 'Phase:...waiting' in output, got: $(echo "$output" | head -20)" - return 1 - fi -} - -# Test: Monitor displays "Loop cancelled" for cancelled state -test_monitor_output_phase_cancelled() { - local test_dir="$TEST_TEMP_DIR/monitor_phase_cancelled" - mkdir -p "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" - - # Create cancel-state.md (cancelled state) - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/cancel-state.md" << 'EOF' ---- -current_round: 1 -startup_case: 3 -pr_number: 123 -configured_bots: - - codex -active_bots: - - codex -cancelled_at: 2026-01-18T12:00:00Z ---- -EOF - - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/goal-tracker.md" << 'GOAL' -# Goal Tracker -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | -GOAL - - local output - output=$(run_monitor_once_capture_output "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" "$test_dir") - - # Assert output contains cancel phase (require Phase: label) - if echo "$output" | grep -qi "Phase:.*cancel"; then - return 0 - else - echo "Expected 'Phase:...cancel' in output, got: $(echo "$output" | head -20)" - return 1 - fi -} - -# Test: Monitor displays "Codex analyzing..." for codex_analyzing phase -test_monitor_output_phase_codex_analyzing() { - local test_dir="$TEST_TEMP_DIR/monitor_phase_analyzing" - mkdir -p "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" - - # Create state.md for active session - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/state.md" << 'EOF' ---- -current_round: 1 -startup_case: 2 -pr_number: 123 -configured_bots: - - codex -active_bots: - - codex ---- -EOF - - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/goal-tracker.md" << 'GOAL' -# Goal Tracker -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | -GOAL - - # Create a pr-check file with current mtime (simulates Codex actively writing) - local check_file="$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/round-1-pr-check.md" - echo "Analyzing PR..." > "$check_file" - # Touch with current time ensures mtime is within 10 seconds - touch "$check_file" - - local output - output=$(run_monitor_once_capture_output "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" "$test_dir") - - # Assert output contains "Codex analyzing" phase (require Phase: prefix) - if echo "$output" | grep -qi "Phase:.*Codex.*analyz"; then - return 0 - else - echo "Expected 'Phase:...Codex analyzing' in output, got: $(echo "$output" | head -20)" - return 1 - fi -} - -# ======================================== -# Test: Case 1 Exception - No Trigger Required -# ======================================== - -test_case1_exception_no_trigger() { - # For startup_case 1/2/3 in round 0, no trigger is required - # This tests the logic that determines REQUIRE_TRIGGER - - # Test startup_case 1, round 0 -> REQUIRE_TRIGGER=false - local round=0 - local startup_case=1 - local require_trigger=false - - if [[ "$round" -gt 0 ]]; then - require_trigger=true - elif [[ "$round" -eq 0 ]]; then - case "$startup_case" in - 1|2|3) require_trigger=false ;; - 4|5) require_trigger=true ;; - esac - fi - - [[ "$require_trigger" == "false" ]] || { echo "Case 1 should not require trigger"; return 1; } - - # Test startup_case 2, round 0 -> REQUIRE_TRIGGER=false - startup_case=2 - require_trigger=false - if [[ "$round" -gt 0 ]]; then - require_trigger=true - elif [[ "$round" -eq 0 ]]; then - case "$startup_case" in - 1|2|3) require_trigger=false ;; - 4|5) require_trigger=true ;; - esac - fi - - [[ "$require_trigger" == "false" ]] || { echo "Case 2 should not require trigger"; return 1; } - - # Test startup_case 4, round 0 -> REQUIRE_TRIGGER=true - startup_case=4 - require_trigger=false - if [[ "$round" -gt 0 ]]; then - require_trigger=true - elif [[ "$round" -eq 0 ]]; then - case "$startup_case" in - 1|2|3) require_trigger=false ;; - 4|5) require_trigger=true ;; - esac - fi - - [[ "$require_trigger" == "true" ]] || { echo "Case 4 should require trigger"; return 1; } - - # Test round 1 (any case) -> REQUIRE_TRIGGER=true - round=1 - startup_case=1 - require_trigger=false - if [[ "$round" -gt 0 ]]; then - require_trigger=true - elif [[ "$round" -eq 0 ]]; then - case "$startup_case" in - 1|2|3) require_trigger=false ;; - 4|5) require_trigger=true ;; - esac - fi - - [[ "$require_trigger" == "true" ]] || { echo "Round 1 should require trigger"; return 1; } -} - -# ======================================== -# Test: Goal Tracker Row Inside Table -# ======================================== - -test_goal_tracker_row_inside_table() { - # Verify that update_pr_goal_tracker inserts rows INSIDE the Issue Summary table - # Not before "## Total Statistics" - - source "$PROJECT_ROOT/hooks/lib/loop-common.sh" - - local tracker_file="$TEST_TEMP_DIR/goal-tracker-table.md" - cat > "$tracker_file" << 'EOF' -# PR Review Goal Tracker - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | - -## Total Statistics - -- Total Issues Found: 0 -- Total Issues Resolved: 0 -- Remaining: 0 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* -EOF - - # Update with round 1 - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 2, "resolved": 0, "bot": "Codex"}' - - # Verify: The new row should be BEFORE the blank line that ends the table - # Check that there's a table row with Round 1 BEFORE "## Total Statistics" - - # Extract just the Issue Summary section - local summary_section - summary_section=$(sed -n '/^## Issue Summary/,/^## Total Statistics/p' "$tracker_file") - - # The section should contain | 1 | somewhere (Round 1 row) - echo "$summary_section" | grep -qE '^\|[[:space:]]*1[[:space:]]*\|' || { - echo "Round 1 row not found in Issue Summary table" - echo "Content:" - cat "$tracker_file" - return 1 - } - - # Verify the row appears BEFORE "## Total Statistics" (already ensured by sed range) - # and the table structure is valid (rows end before blank line before ## Total Statistics) - - # Count table rows in Issue Summary (should be 3: header, separator, round 0, round 1) - local row_count - row_count=$(echo "$summary_section" | grep -cE '^\|' || echo 0) - [[ "$row_count" -ge 4 ]] || { - echo "Expected at least 4 table rows (header + separator + 2 data rows), got $row_count" - return 1 - } -} - -# ======================================== -# Test: Goal Tracker Partial Update Repair -# ======================================== - -test_goal_tracker_partial_update_repair() { - # Verify that update_pr_goal_tracker repairs partial updates - # (when only summary OR log exists, not both) - - source "$PROJECT_ROOT/hooks/lib/loop-common.sh" - - # Test 1: Tracker with summary row but NO log entry - local tracker_file="$TEST_TEMP_DIR/goal-tracker-partial1.md" - cat > "$tracker_file" << 'EOF' -# PR Review Goal Tracker - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | -| 1 | Codex | 2 | 0 | Issues Found | - -## Total Statistics - -- Total Issues Found: 2 -- Total Issues Resolved: 0 -- Remaining: 2 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* -EOF - - # Update - should add log entry but not summary row (since summary exists) - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 2, "resolved": 0, "bot": "Codex"}' - - # Should now have Round 1 in Issue Log - grep -q "### Round 1" "$tracker_file" || { echo "Log entry for Round 1 not added"; return 1; } - - # Test 2: Tracker with log entry but NO summary row - local tracker_file2="$TEST_TEMP_DIR/goal-tracker-partial2.md" - cat > "$tracker_file2" << 'EOF' -# PR Review Goal Tracker - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | - -## Total Statistics - -- Total Issues Found: 0 -- Total Issues Resolved: 0 -- Remaining: 0 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* - -### Round 1 -Codex: Found 2 issues, Resolved 0 -EOF - - # Update - should add summary row but not log entry (since log exists) - update_pr_goal_tracker "$tracker_file2" 1 '{"issues": 2, "resolved": 0, "bot": "Codex"}' - - # Should now have Round 1 in summary table - grep -qE '^\|[[:space:]]*1[[:space:]]*\|' "$tracker_file2" || { echo "Summary row for Round 1 not added"; return 1; } -} - -# ======================================== -# Test: Case 4 Emission (all commented + new commits) -# ======================================== - -test_case4_all_commented_new_commits() { - # Verify Case 4 is emitted when ALL reviewers commented and new commits after - - # Fixture: All bots commented at 10:00, latest commit at 11:00 - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T10:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T10:05:00Z","body":"LGTM","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - # Mock commit at 11:00 (after reviews) - export MOCK_GH_LATEST_COMMIT_AT="2026-01-18T11:00:00Z" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex" 2>/dev/null) || true - - # Should return Case 4 (all commented, new commits) - local case_num - case_num=$(echo "$result" | jq -r '.case') - [[ "$case_num" == "4" ]] || { echo "Expected Case 4, got $case_num"; return 1; } - - # has_commits_after_reviews should be true - local has_commits - has_commits=$(echo "$result" | jq -r '.has_commits_after_reviews') - [[ "$has_commits" == "true" ]] || { echo "Expected has_commits_after_reviews=true, got $has_commits"; return 1; } - - # Cleanup mock - unset MOCK_GH_LATEST_COMMIT_AT -} - -# ======================================== -# Test: Case 5 Emission (partial + new commits) -# ======================================== - -test_case5_partial_commented_new_commits() { - # Verify Case 5 is emitted when SOME reviewers commented and new commits after - - # Fixture: Only claude commented at 10:00, codex missing - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T10:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[]' > "$FIXTURES_DIR/pr-reviews.json" # No codex - - # Mock commit at 11:00 (after claude's review) - export MOCK_GH_LATEST_COMMIT_AT="2026-01-18T11:00:00Z" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex" 2>/dev/null) || true - - # Should return Case 5 (partial commented, new commits) - local case_num - case_num=$(echo "$result" | jq -r '.case') - [[ "$case_num" == "5" ]] || { echo "Expected Case 5, got $case_num"; return 1; } - - # has_commits_after_reviews should be true - local has_commits - has_commits=$(echo "$result" | jq -r '.has_commits_after_reviews') - [[ "$has_commits" == "true" ]] || { echo "Expected has_commits_after_reviews=true, got $has_commits"; return 1; } - - # Cleanup mock - unset MOCK_GH_LATEST_COMMIT_AT - - # Restore original fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM! Code looks good.","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" -} - -# ======================================== -# Test: Setup Case 4/5 Failure Path (missing trigger_comment_id) -# ======================================== - -test_setup_case45_missing_trigger_comment_id() { - # Test that setup-pr-loop.sh fails when trigger_comment_id cannot be retrieved - # for Case 4/5 with --claude option - # This tests the fix that requires eyes verification - - # Set up fixtures for Case 4: All bots commented, new commits after reviews - # Only claude for simplicity - fixture needs bot comment BEFORE latest commit - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T08:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[]' > "$FIXTURES_DIR/pr-reviews.json" - - # Set latest commit AFTER bot comments to trigger Case 4 - export MOCK_GH_LATEST_COMMIT_AT="2026-01-18T12:00:00Z" - export MOCK_GH_PR_NUMBER=123 - export MOCK_GH_PR_STATE="OPEN" - # Make the regular mock return null for the comment lookup that gets the trigger ID - export MOCK_GH_COMMENT_ID_LOOKUP_FAIL=true - - # Run setup-pr-loop.sh with --claude - should fail due to missing trigger_comment_id - local result exit_code - result=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --claude 2>&1) && exit_code=0 || exit_code=$? - - # Clean up mock env vars - unset MOCK_GH_LATEST_COMMIT_AT MOCK_GH_COMMENT_ID_LOOKUP_FAIL - - # Verify it failed - if [[ $exit_code -eq 0 ]]; then - echo "Expected setup to fail but it succeeded" - echo "Output (last 30 lines): $(echo "$result" | tail -30)" - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - return 1 - fi - - # Verify error message about missing trigger comment ID - if ! echo "$result" | grep -q "Could not find trigger comment ID"; then - echo "Expected error message about missing trigger_comment_id" - echo "Got: $result" - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - return 1 - fi - - # Verify loop directory was cleaned up - if ls .humanize/pr-loop/*/state.md 2>/dev/null | head -1 | grep -q .; then - echo "Loop directory was not cleaned up on failure" - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - return 1 - fi - - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - return 0 -} - -# ======================================== -# Test: Goal Tracker Creation/Update Integration Test -# ======================================== - -test_goal_tracker_creation_integration() { - # Test that setup-pr-loop.sh creates goal-tracker.md - # This verifies: goal tracker is created at setup - - # Set up fixtures for Case 1: No comments yet (simplest setup) - echo '[]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[]' > "$FIXTURES_DIR/reactions.json" - - export MOCK_GH_PR_NUMBER=999 - export MOCK_GH_PR_STATE="OPEN" - export MOCK_GH_LATEST_COMMIT_AT="2026-01-18T10:00:00Z" - export MOCK_GH_HEAD_SHA="abc123xyz" - - # Clean up any existing pr-loop directories - rm -rf .humanize/pr-loop 2>/dev/null || true - - # Run setup-pr-loop.sh with --codex - local result exit_code - result=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --codex 2>&1) && exit_code=0 || exit_code=$? - - # Clean up mock env vars - unset MOCK_GH_PR_NUMBER MOCK_GH_PR_STATE MOCK_GH_LATEST_COMMIT_AT MOCK_GH_HEAD_SHA - - # Find the created loop directory - local loop_dir - loop_dir=$(ls -d .humanize/pr-loop/*/ 2>/dev/null | head -1) - - if [[ -z "$loop_dir" ]]; then - echo "No loop directory created by setup-pr-loop.sh" - echo "Output: $(echo "$result" | tail -20)" - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[{"id":5001,"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T11:10:00Z"}]' > "$FIXTURES_DIR/reactions.json" - return 1 - fi - - # Verify goal-tracker.md was created - if [[ ! -f "${loop_dir}goal-tracker.md" ]]; then - echo "goal-tracker.md not found in $loop_dir" - echo "Files in loop dir: $(ls -la "$loop_dir" 2>/dev/null)" - # Clean up - rm -rf .humanize/pr-loop - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[{"id":5001,"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T11:10:00Z"}]' > "$FIXTURES_DIR/reactions.json" - return 1 - fi - - # Verify goal-tracker.md has expected structure (Issue Summary table) - if ! grep -q "Issue Summary" "${loop_dir}goal-tracker.md"; then - echo "goal-tracker.md missing 'Issue Summary' section" - echo "Contents: $(cat "${loop_dir}goal-tracker.md")" - rm -rf .humanize/pr-loop - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[{"id":5001,"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T11:10:00Z"}]' > "$FIXTURES_DIR/reactions.json" - return 1 - fi - - # Verify goal-tracker.md has PR number from mock - if ! grep -q "999" "${loop_dir}goal-tracker.md"; then - echo "goal-tracker.md missing PR number 999" - echo "Contents: $(cat "${loop_dir}goal-tracker.md")" - rm -rf .humanize/pr-loop - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[{"id":5001,"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T11:10:00Z"}]' > "$FIXTURES_DIR/reactions.json" - return 1 - fi - - # Clean up - rm -rf .humanize/pr-loop - - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[{"id":5001,"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T11:10:00Z"}]' > "$FIXTURES_DIR/reactions.json" - - return 0 -} - -# Test: Stop hook updates goal tracker with round results -test_stophook_updates_goal_tracker() { - # This test verifies that running the stop hook after bot review updates the goal tracker - local test_dir="$TEST_TEMP_DIR/stophook_goal_test" - mkdir -p "$test_dir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Use dynamic timestamps - local trigger_ts commit_ts comment_ts - trigger_ts=$(date -u -d "-10 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-10S +%Y-%m-%dT%H:%M:%SZ) - commit_ts=$(date -u -d "-60 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-60S +%Y-%m-%dT%H:%M:%SZ) - comment_ts=$(date -u -d "-5 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-5S +%Y-%m-%dT%H:%M:%SZ) - - # Create state.md for Round 0 - cat > "$test_dir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 60 -started_at: $commit_ts -last_trigger_at: $trigger_ts -trigger_comment_id: 999 -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: $commit_ts ---- -EOF - - # Create initial goal tracker (need blank line after table header for row insertion) - cat > "$test_dir/.humanize/pr-loop/2026-01-18_12-00-00/goal-tracker.md" << 'EOF' -# PR Review Goal Tracker (PR #123) - -## Issue Summary - -| Round | Bot | Issues Found | Issues Resolved | Status | -|-------|-----|--------------|-----------------|--------| - -## Total Statistics -- Total Issues Found: 0 -- Total Issues Resolved: 0 -EOF - - # Create round-0 resolve file - echo "# Resolution" > "$test_dir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - # Create mock gh and git - local mock_bin="$test_dir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << MOCK_GH -#!/bin/bash -COMMENT_TS="$comment_ts" -COMMIT_TS="$commit_ts" - -case "\$1" in - repo) - if [[ "\$*" == *"--json owner"* ]]; then - echo "testowner" - exit 0 - fi - if [[ "\$*" == *"--json name"* ]]; then - echo "testrepo" - exit 0 - fi - ;; - api) - if [[ "\$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - if [[ "\$2" == *"/issues/"*"/comments"* ]]; then - # Return codex comment with issues - echo "[{\"id\":1001,\"user\":{\"login\":\"chatgpt-codex-connector[bot]\",\"type\":\"Bot\"},\"created_at\":\"\$COMMENT_TS\",\"body\":\"Found 2 issues: fix X, fix Y\"}]" - exit 0 - fi - if [[ "\$2" == *"/pulls/"*"/reviews"* ]]; then - echo '[]' - exit 0 - fi - if [[ "\$2" == *"/pulls/"*"/comments"* ]]; then - echo '[]' - exit 0 - fi - echo '[]' - exit 0 - ;; - pr) - if [[ "\$*" == *"commits"* ]] && [[ "\$*" == *"headRefOid"* ]]; then - echo "{\"sha\":\"abc123\",\"date\":\"\$COMMIT_TS\"}" - exit 0 - fi - if [[ "\$*" == *"commits"* ]] && [[ "\$*" == *"--jq"* ]]; then - # When --jq is used, return just the extracted timestamp - echo "\$COMMIT_TS" - exit 0 - fi - if [[ "\$*" == *"commits"* ]]; then - echo "{\"commits\":[{\"committedDate\":\"\$COMMIT_TS\"}]}" - exit 0 - fi - # PR lookup with number and url: gh pr view --json number,url -q '.number,.url' - if [[ "\$*" == *"number,url"* ]]; then - echo '123' - echo 'https://github.com/testowner/testrepo/pull/123' - exit 0 - fi - # PR existence check: gh pr view --repo ... --json number -q .number - if [[ "\$*" == *"number"* ]] && [[ "\$*" != *"commits"* ]]; then - echo '123' - exit 0 - fi - if [[ "\$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - echo "" - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - # Mock codex command - returns ISSUES_REMAINING to trigger goal tracker update - cat > "$mock_bin/codex" << 'MOCK_CODEX' -#!/bin/bash -# Mock codex for testing - output review analysis -cat << 'CODEX_OUTPUT' -## Bot Review Analysis - -### codex (chatgpt-codex-connector[bot]) -**Status**: ISSUES -**Issues Found**: 1 -- Fix issue X - -### Issues Found (if any) -- Fix issue X - -### Approved Bots (to remove from active_bots) -(none) - -### Final Recommendation -ISSUES_REMAINING -CODEX_OUTPUT -exit 0 -MOCK_CODEX - chmod +x "$mock_bin/codex" - - # Run stop hook - export CLAUDE_PROJECT_DIR="$test_dir" - local old_path="$PATH" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(timeout 15 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT" 2>&1) || true - - export PATH="$old_path" - unset CLAUDE_PROJECT_DIR - - # Verify goal tracker was updated with Round 1 row - local goal_file="$test_dir/.humanize/pr-loop/2026-01-18_12-00-00/goal-tracker.md" - if [[ ! -f "$goal_file" ]]; then - echo "Goal tracker file not found" - rm -rf "$test_dir" - return 1 - fi - - # Check that Round 1 row was added (format: | 1 | with possible spaces) - if ! grep -qE '^\|[[:space:]]*1[[:space:]]*\|' "$goal_file"; then - echo "Goal tracker not updated with Round 1" - echo "Contents: $(cat "$goal_file")" - echo "Hook output: $(echo "$hook_output" | tail -20)" - rm -rf "$test_dir" - return 1 - fi - - # Check that codex bot is mentioned in the row (lowercase to match configured bot names) - if ! grep -qi "codex" "$goal_file"; then - echo "Goal tracker missing codex bot entry" - echo "Contents: $(cat "$goal_file")" - rm -rf "$test_dir" - return 1 - fi - - rm -rf "$test_dir" - return 0 -} - -# ======================================== -# Main test runner -# ======================================== - -main() { - local test_filter="${1:-}" - - echo "==========================================" - echo " PR Loop System Tests" - echo "==========================================" - echo "" - echo "Project root: $PROJECT_ROOT" - echo "Mock directory: $MOCKS_DIR" - echo "Fixtures directory: $FIXTURES_DIR" - echo "" - - # Run tests - if [[ -z "$test_filter" || "$test_filter" == "mutual_exclusion" ]]; then - run_test "Mutual exclusion - RLCR blocks PR" test_mutual_exclusion_rlcr_blocks_pr - run_test "Mutual exclusion - PR blocks RLCR" test_mutual_exclusion_pr_blocks_rlcr - fi - - if [[ -z "$test_filter" || "$test_filter" == "reviewer_status" ]]; then - run_test "Reviewer status - Case 1 (no comments)" test_reviewer_status_case1_no_comments - run_test "Reviewer status - Case 2 (partial comments)" test_reviewer_status_case2_partial_comments - fi - - if [[ -z "$test_filter" || "$test_filter" == "reactions" ]]; then - run_test "Codex +1 detection" test_codex_thumbsup_detected - run_test "Codex +1 with --after filter" test_codex_thumbsup_with_after_filter - run_test "Claude eyes detection" test_claude_eyes_detected - fi - - if [[ -z "$test_filter" || "$test_filter" == "pr_reviews" ]]; then - run_test "PR reviews detection" test_reviewer_status_includes_pr_reviews - fi - - if [[ -z "$test_filter" || "$test_filter" == "phase" ]]; then - run_test "Phase detection - approved" test_phase_detection_approved - run_test "Phase detection - waiting initial" test_phase_detection_waiting_initial - run_test "Phase detection - waiting reviewer" test_phase_detection_waiting_reviewer - fi - - if [[ -z "$test_filter" || "$test_filter" == "goal_tracker" ]]; then - run_test "Goal tracker parsing" test_goal_tracker_parsing - fi - - if [[ -z "$test_filter" || "$test_filter" == "pr_goal_tracker" ]]; then - run_test "PR goal tracker parsing" test_pr_goal_tracker_parsing - run_test "update_pr_goal_tracker helper" test_update_pr_goal_tracker - fi - - if [[ -z "$test_filter" || "$test_filter" == "state_file" ]]; then - run_test "State file detection - active" test_state_file_detection_active - run_test "State file detection - approve" test_state_file_detection_approve - fi - - if [[ -z "$test_filter" || "$test_filter" == "phase_extended" ]]; then - run_test "Phase detection - cancelled" test_phase_detection_cancelled - run_test "Phase detection - maxiter" test_phase_detection_maxiter - fi - - if [[ -z "$test_filter" || "$test_filter" == "reviewer_status_extended" ]]; then - run_test "Reviewer status - Case 3 (all commented)" test_reviewer_status_case3_all_commented - fi - - if [[ -z "$test_filter" || "$test_filter" == "unpushed" ]]; then - run_test "Unpushed commits detection" test_unpushed_commits_detected - fi - - if [[ -z "$test_filter" || "$test_filter" == "force_push" ]]; then - run_test "Force push ancestry check" test_force_push_ancestry_check - fi - - if [[ -z "$test_filter" || "$test_filter" == "approve_state" ]]; then - run_test "Approve state detection" test_approve_state_detection - fi - - if [[ -z "$test_filter" || "$test_filter" == "goal_tracker_schema" ]]; then - run_test "Goal tracker schema" test_goal_tracker_schema - run_test "Goal tracker update adds row" test_goal_tracker_update_adds_row - run_test "Goal tracker update idempotent" test_goal_tracker_update_idempotent - fi - - if [[ -z "$test_filter" || "$test_filter" == "startup_case" ]]; then - run_test "Startup case 4/5 detection" test_startup_case_4_5_detection - fi - - if [[ -z "$test_filter" || "$test_filter" == "shared_monitor" ]]; then - run_test "Shared monitor - find latest session" test_shared_monitor_find_latest_session - run_test "Shared monitor - find state file" test_shared_monitor_find_state_file - run_test "Shared monitor - get file size" test_shared_monitor_get_file_size - fi - - if [[ -z "$test_filter" || "$test_filter" == "phase_analyzing" ]]; then - run_test "Phase detection - codex analyzing (file growth)" test_phase_detection_codex_analyzing - fi - - # Monitor output assertions for phase labels - if [[ -z "$test_filter" || "$test_filter" == "monitor_output" ]]; then - run_test "Monitor output - approved phase display" test_monitor_output_phase_approved - run_test "Monitor output - waiting initial phase display" test_monitor_output_phase_waiting_initial - run_test "Monitor output - cancelled phase display" test_monitor_output_phase_cancelled - run_test "Monitor output - codex analyzing phase display" test_monitor_output_phase_codex_analyzing - fi - - if [[ -z "$test_filter" || "$test_filter" == "case1_exception" ]]; then - run_test "Case 1 exception - no trigger required for startup_case 1" test_case1_exception_no_trigger - fi - - if [[ -z "$test_filter" || "$test_filter" == "goal_tracker_table" ]]; then - run_test "Goal tracker row inserted inside table" test_goal_tracker_row_inside_table - fi - - if [[ -z "$test_filter" || "$test_filter" == "goal_tracker_partial" ]]; then - run_test "Goal tracker partial update repair" test_goal_tracker_partial_update_repair - fi - - if [[ -z "$test_filter" || "$test_filter" == "case_4_5" ]]; then - run_test "Case 4 emission (all commented + new commits)" test_case4_all_commented_new_commits - run_test "Case 5 emission (partial + new commits)" test_case5_partial_commented_new_commits - fi - - if [[ -z "$test_filter" || "$test_filter" == "setup_failure" ]]; then - run_test "Setup Case 4/5 failure path (missing trigger_comment_id)" test_setup_case45_missing_trigger_comment_id - fi - - if [[ -z "$test_filter" || "$test_filter" == "goal_tracker_integration" ]]; then - run_test "Goal tracker creation via setup-pr-loop.sh" test_goal_tracker_creation_integration - run_test "Stop hook updates goal tracker with round results" test_stophook_updates_goal_tracker - fi - - echo "" - echo "==========================================" - echo " Results" - echo "==========================================" - echo "" - echo "Tests run: $TESTS_RUN" - echo -e "Tests passed: ${GREEN}$TESTS_PASSED${NC}" - echo -e "Tests failed: ${RED}$TESTS_FAILED${NC}" - echo "" - - if [[ $TESTS_FAILED -gt 0 ]]; then - exit 1 - fi -} - -main "$@" diff --git a/tests/test-pr-loop.sh b/tests/test-pr-loop.sh deleted file mode 100755 index 54af3829..00000000 --- a/tests/test-pr-loop.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash -# -# Tests for PR loop feature -# -# This is the main test runner that sources and executes all test modules: -# - test-pr-loop-scripts.sh: Script argument validation tests -# - test-pr-loop-hooks.sh: Hook functionality tests -# - test-pr-loop-stophook.sh: Stop hook tests -# -# Usage: ./test-pr-loop.sh -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -# Source test helpers and common library -source "$SCRIPT_DIR/test-helpers.sh" -source "$SCRIPT_DIR/test-pr-loop-lib.sh" - -# ======================================== -# Test Environment Setup -# ======================================== - -init_pr_loop_test_env - -# ======================================== -# Source Test Modules -# ======================================== - -source "$SCRIPT_DIR/test-pr-loop-scripts.sh" -source "$SCRIPT_DIR/test-pr-loop-hooks.sh" -source "$SCRIPT_DIR/test-pr-loop-stophook.sh" - -# ======================================== -# Run All Tests -# ======================================== - -# Script tests (setup, cancel, fetch, poll) -run_script_tests - -# Hook functionality tests -run_hook_tests - -# Stop hook tests -run_stophook_tests - -# ======================================== -# Print Summary -# ======================================== - -print_test_summary diff --git a/tests/test-refine-plan.sh b/tests/test-refine-plan.sh index 36e56e6a..c43ba60f 100755 --- a/tests/test-refine-plan.sh +++ b/tests/test-refine-plan.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for refine-plan command structure, validator behavior, QA template coverage, # and AC-7 installation wiring coverage @@ -706,7 +706,7 @@ assert_file_contains "$REFINE_PLAN_CMD" 'Keep `--alt-language` out of the valida assert_file_contains "$REFINE_PLAN_CMD" "- Exit code 0: Continue to Phase 2" "refine-plan.md documents validator exit code 0" assert_file_contains "$REFINE_PLAN_CMD" '- Exit code 1: Report `Input file not found` and stop' "refine-plan.md documents validator exit code 1" assert_file_contains "$REFINE_PLAN_CMD" '- Exit code 2: Report `Input file is empty` and stop' "refine-plan.md documents validator exit code 2" -assert_file_contains "$REFINE_PLAN_CMD" '- Exit code 3: Report `Input file has no CMT:/ENDCMT blocks` and stop' "refine-plan.md documents validator exit code 3" +assert_file_contains "$REFINE_PLAN_CMD" '- Exit code 3: Report `Input file has no comment blocks` and stop' "refine-plan.md documents validator exit code 3" assert_file_contains "$REFINE_PLAN_CMD" '- Exit code 4: Report `Input file is missing required gen-plan sections` and stop' "refine-plan.md documents validator exit code 4" assert_file_contains "$REFINE_PLAN_CMD" '- Exit code 5: Report `Output directory does not exist or is not writable - please fix it` and stop' "refine-plan.md documents validator exit code 5" assert_file_contains "$REFINE_PLAN_CMD" '- Exit code 6: Report `QA directory is not writable` and stop' "refine-plan.md documents validator exit code 6" @@ -714,18 +714,18 @@ assert_file_contains "$REFINE_PLAN_CMD" '- Exit code 7: Report `Invalid argument echo "" echo "PT-5: Comment extraction requirements" -assert_file_contains "$REFINE_PLAN_CMD" "Support both inline and multi-line blocks:" "refine-plan.md supports inline and multiline comment extraction" +assert_file_contains "$REFINE_PLAN_CMD" "Support both inline and multi-line blocks for all formats:" "refine-plan.md supports inline and multiline comment extraction" assert_file_contains "$REFINE_PLAN_CMD" 'Inline: `Text before CMT: comment text ENDCMT text after`' "refine-plan.md documents single-line comment extraction" assert_file_contains "$REFINE_PLAN_CMD" "CMT:" "refine-plan.md includes multiline comment marker example" -assert_file_contains "$REFINE_PLAN_CMD" 'Ignore `CMT:` and `ENDCMT` sequences inside fenced code blocks.' "refine-plan.md documents code fence exclusion" -assert_file_contains "$REFINE_PLAN_CMD" 'Ignore `CMT:` and `ENDCMT` sequences inside HTML comments.' "refine-plan.md documents HTML comment exclusion" +assert_file_contains "$REFINE_PLAN_CMD" 'Ignore comment markers inside fenced code blocks.' "refine-plan.md documents code fence exclusion" +assert_file_contains "$REFINE_PLAN_CMD" 'Ignore comment markers inside HTML comments.' "refine-plan.md documents HTML comment exclusion" assert_file_contains "$REFINE_PLAN_CMD" "Preserve surrounding non-comment text when removing inline comment blocks from the working plan text." "refine-plan.md preserves inline surrounding text" assert_file_contains "$REFINE_PLAN_CMD" '- `nearest_heading` or `Preamble` when no heading exists yet' "refine-plan.md records nearest heading or Preamble" assert_file_contains "$REFINE_PLAN_CMD" '- `location_label` for QA output' "refine-plan.md records location labels" assert_file_contains "$REFINE_PLAN_CMD" '- `form` = `inline` or `multiline`' "refine-plan.md records comment form" assert_file_contains "$REFINE_PLAN_CMD" '- `context_excerpt` from the nearest non-comment source text' "refine-plan.md records context excerpts" -assert_file_contains "$REFINE_PLAN_CMD" 'Nested `CMT:` while already inside a comment block' "refine-plan.md documents nested CMT parse errors" -assert_file_contains "$REFINE_PLAN_CMD" '`ENDCMT` encountered while not inside a comment block' "refine-plan.md documents stray ENDCMT parse errors" +assert_file_contains "$REFINE_PLAN_CMD" 'Nested comment start marker while already inside a comment block' "refine-plan.md documents nested CMT parse errors" +assert_file_contains "$REFINE_PLAN_CMD" 'Comment end marker encountered while not inside a comment block or wrong end marker for the format' "refine-plan.md documents stray ENDCMT parse errors" assert_file_contains "$REFINE_PLAN_CMD" "End of file reached while still inside a comment block" "refine-plan.md documents missing ENDCMT parse errors" assert_file_contains "$REFINE_PLAN_CMD" "No non-empty CMT blocks remain after parsing" "refine-plan.md rejects empty-only comment sets" @@ -1161,10 +1161,10 @@ else fail "validate-refine-plan-io: unterminated CMT blocks exit 3" "3" "$VALIDATOR_EXIT_CODE" fi -if echo "$VALIDATOR_OUTPUT" | grep -q "missing ENDCMT"; then +if echo "$VALIDATOR_OUTPUT" | grep -q "missing end marker"; then pass "validate-refine-plan-io: unterminated CMT blocks report missing ENDCMT" else - fail "validate-refine-plan-io: unterminated CMT blocks report missing ENDCMT" "missing ENDCMT" "$VALIDATOR_OUTPUT" + fail "validate-refine-plan-io: unterminated CMT blocks report missing ENDCMT" "missing end marker" "$VALIDATOR_OUTPUT" fi if echo "$VALIDATOR_OUTPUT" | grep -q 'context: "CMT: this block never closes"'; then @@ -1182,10 +1182,10 @@ else fail "validate-refine-plan-io: nested CMT blocks exit 3" "3" "$VALIDATOR_EXIT_CODE" fi -if echo "$VALIDATOR_OUTPUT" | grep -q "nested CMT block"; then +if echo "$VALIDATOR_OUTPUT" | grep -q "nested comment block"; then pass "validate-refine-plan-io: nested CMT blocks report a parse error" else - fail "validate-refine-plan-io: nested CMT blocks report a parse error" "nested CMT block" "$VALIDATOR_OUTPUT" + fail "validate-refine-plan-io: nested CMT blocks report a parse error" "nested comment block" "$VALIDATOR_OUTPUT" fi MISSING_SECTION_PLAN="$TEST_FIXTURES_DIR/missing-sections-plan.md" @@ -1314,10 +1314,10 @@ else fail "validate-refine-plan-io: mixed valid, ignored, and empty markers still pass with a valid block" "0" "$VALIDATOR_EXIT_CODE" fi -if echo "$VALIDATOR_OUTPUT" | grep -Eq 'Input file: .+ \([0-9]+ lines, 1 CMT blocks\)'; then +if echo "$VALIDATOR_OUTPUT" | grep -Eq 'Input file: .+ \([0-9]+ lines, 1 comment blocks\)'; then pass "validate-refine-plan-io: success output reports only valid non-empty CMT blocks" else - fail "validate-refine-plan-io: success output reports only valid non-empty CMT blocks" "1 CMT blocks" "$VALIDATOR_OUTPUT" + fail "validate-refine-plan-io: success output reports only valid non-empty CMT blocks" "1 comment blocks" "$VALIDATOR_OUTPUT" fi NEW_FILE_DIR="$TEST_FIXTURES_DIR/new-file-output" diff --git a/tests/test-session-id.sh b/tests/test-session-id.sh index 31fffc58..33de5ba1 100755 --- a/tests/test-session-id.sh +++ b/tests/test-session-id.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for session_id feature in RLCR loop # @@ -120,7 +120,7 @@ cat > "$TEST_DIR/project/.humanize/rlcr/2026-01-01_00-00-00/state.md" << 'EOF' --- current_round: 0 max_iterations: 10 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 5400 push_every_round: false diff --git a/tests/test-skill-monitor.sh b/tests/test-skill-monitor.sh index 54f229ae..4dcf708b 100755 --- a/tests/test-skill-monitor.sh +++ b/tests/test-skill-monitor.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for _humanize_monitor_skill (humanize monitor skill) # @@ -58,7 +58,7 @@ setup_test_env() { create_skill_invocation() { local unique_id="$1" local status="$2" - local model="${3:-gpt-5.4}" + local model="${3:-gpt-5.5}" local effort="${4:-high}" local duration="${5:-15s}" local question="${6:-How should I structure this?}" @@ -140,7 +140,7 @@ echo "=== Skill Monitor: Single Invocation ===" setup_test_env mkdir -p .humanize/skill -create_skill_invocation "2026-02-19_21-02-35-12345-abc123" "success" "gpt-5.4" "high" "15s" "How should I structure the auth module?" +create_skill_invocation "2026-02-19_21-02-35-12345-abc123" "success" "gpt-5.5" "high" "15s" "How should I structure the auth module?" output=$(_humanize_monitor_skill --once 2>&1) && rc=0 || rc=$? if [[ $rc -eq 0 ]]; then @@ -167,7 +167,7 @@ else fail "Should show success status" "got: $output" fi -if grep -q "gpt-5.4" <<< "$output"; then +if grep -q "gpt-5.5" <<< "$output"; then pass "Shows model name" else fail "Should show model" "got: $output" @@ -199,10 +199,10 @@ echo "=== Skill Monitor: Multiple Invocations ===" setup_test_env mkdir -p .humanize/skill -create_skill_invocation "2026-02-19_20-00-00-111-aaa" "success" "gpt-5.4" "high" "10s" "First question" -create_skill_invocation "2026-02-19_20-30-00-222-bbb" "error" "gpt-5.4" "high" "5s" "Second question" -create_skill_invocation "2026-02-19_21-00-00-333-ccc" "timeout" "gpt-5.4" "high" "3600s" "Third question" -create_skill_invocation "2026-02-19_21-30-00-444-ddd" "success" "gpt-5.4" "high" "20s" "Latest question" +create_skill_invocation "2026-02-19_20-00-00-111-aaa" "success" "gpt-5.5" "high" "10s" "First question" +create_skill_invocation "2026-02-19_20-30-00-222-bbb" "error" "gpt-5.5" "high" "5s" "Second question" +create_skill_invocation "2026-02-19_21-00-00-333-ccc" "timeout" "gpt-5.5" "high" "3600s" "Third question" +create_skill_invocation "2026-02-19_21-30-00-444-ddd" "success" "gpt-5.5" "high" "20s" "Latest question" output=$(_humanize_monitor_skill --once 2>&1) && rc=0 || rc=$? if grep -q "Total Invocations: 4" <<< "$output"; then @@ -250,8 +250,8 @@ echo "=== Skill Monitor: Running Invocation ===" setup_test_env mkdir -p .humanize/skill -create_skill_invocation "2026-02-19_21-00-00-111-aaa" "success" "gpt-5.4" "high" "10s" "Completed question" -create_skill_invocation "2026-02-19_21-30-00-222-bbb" "running" "gpt-5.4" "high" "" "Running question" +create_skill_invocation "2026-02-19_21-00-00-111-aaa" "success" "gpt-5.5" "high" "10s" "Completed question" +create_skill_invocation "2026-02-19_21-30-00-222-bbb" "running" "gpt-5.5" "high" "" "Running question" output=$(_humanize_monitor_skill --once 2>&1) && rc=0 || rc=$? if grep -q "Running: 1" <<< "$output"; then @@ -274,9 +274,9 @@ echo "=== Skill Monitor: Recent Invocations List ===" setup_test_env mkdir -p .humanize/skill -create_skill_invocation "2026-02-19_20-00-00-111-aaa" "success" "gpt-5.4" "high" "10s" "Question one" -create_skill_invocation "2026-02-19_20-30-00-222-bbb" "error" "gpt-5.4" "high" "5s" "Question two" -create_skill_invocation "2026-02-19_21-00-00-333-ccc" "success" "gpt-5.4" "high" "20s" "Question three" +create_skill_invocation "2026-02-19_20-00-00-111-aaa" "success" "gpt-5.5" "high" "10s" "Question one" +create_skill_invocation "2026-02-19_20-30-00-222-bbb" "error" "gpt-5.5" "high" "5s" "Question two" +create_skill_invocation "2026-02-19_21-00-00-333-ccc" "success" "gpt-5.5" "high" "20s" "Question three" output=$(_humanize_monitor_skill --once 2>&1) && rc=0 || rc=$? if grep -q "Recent Invocations" <<< "$output"; then @@ -314,13 +314,13 @@ Additional context about the question. ## Configuration -- Model: gpt-5.4 +- Model: gpt-5.5 - Effort: high - Timeout: 3600s EOF cat > "$local_dir/metadata.md" << 'EOF' --- -model: gpt-5.4 +model: gpt-5.5 effort: high timeout: 3600 exit_code: 0 @@ -353,7 +353,7 @@ echo "=== Skill Monitor: Empty Response ===" setup_test_env mkdir -p .humanize/skill -create_skill_invocation "2026-02-19_21-00-00-111-aaa" "empty_response" "gpt-5.4" "high" "30s" "Why is the sky blue?" +create_skill_invocation "2026-02-19_21-00-00-111-aaa" "empty_response" "gpt-5.5" "high" "30s" "Why is the sky blue?" output=$(_humanize_monitor_skill --once 2>&1) && rc=0 || rc=$? if grep -q "Empty: 1" <<< "$output"; then @@ -376,7 +376,7 @@ echo "=== Skill Monitor: Non-skill Dir Filtering ===" setup_test_env mkdir -p .humanize/skill -create_skill_invocation "2026-02-19_21-00-00-111-aaa" "success" "gpt-5.4" "high" "10s" "Real question" +create_skill_invocation "2026-02-19_21-00-00-111-aaa" "success" "gpt-5.5" "high" "10s" "Real question" # Create a non-matching directory mkdir -p ".humanize/skill/not-a-skill-dir" echo "junk" > ".humanize/skill/not-a-skill-dir/input.md" diff --git a/tests/test-state-exit-naming.sh b/tests/test-state-exit-naming.sh index d48c8a95..7982baac 100755 --- a/tests/test-state-exit-naming.sh +++ b/tests/test-state-exit-naming.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for state.md rename on exit # diff --git a/tests/test-stop-gate.sh b/tests/test-stop-gate.sh index f008aa42..8c3b6e25 100755 --- a/tests/test-stop-gate.sh +++ b/tests/test-stop-gate.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for rlcr-stop-gate wrapper project root detection # @@ -40,7 +40,7 @@ PLANEOF --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 60 push_every_round: false @@ -115,30 +115,175 @@ else fail "rlcr-stop-gate --project-root output contains expected block reason" "output containing BLOCK:" "$OUTPUT2" fi -# Test 3: No active loop -> gate allows exit (exit 0) +# Test 3: Tracked Humanize state blocks before normal loop validation T3_DIR="$TEST_DIR/t3" -mkdir -p "$T3_DIR/empty-project" +mkdir -p "$T3_DIR" +setup_active_loop_fixture "$T3_DIR/project" +echo "tracked" > "$T3_DIR/project/.humanize/rlcr/2026-03-01_00-00-00/goal-tracker.md" +git -C "$T3_DIR/project" add -f .humanize/rlcr/2026-03-01_00-00-00/goal-tracker.md set +e ( - cd "$T3_DIR/empty-project" + cd "$T3_DIR/project" "$GATE_SCRIPT" ) > "$T3_DIR/out.txt" 2>&1 EXIT3=$? set -e -if [[ "$EXIT3" -eq 0 ]]; then - pass "rlcr-stop-gate exits 0 when no active loop exists" +if [[ "$EXIT3" -eq 10 ]]; then + pass "rlcr-stop-gate blocks tracked Humanize state" else OUTPUT3=$(cat "$T3_DIR/out.txt" 2>/dev/null || true) - fail "rlcr-stop-gate exits 0 when no active loop exists" "exit 0" "exit $EXIT3; output: $OUTPUT3" + fail "rlcr-stop-gate blocks tracked Humanize state" "exit 10" "exit $EXIT3; output: $OUTPUT3" fi -if grep -q "^ALLOW:" "$T3_DIR/out.txt" 2>/dev/null; then - pass "rlcr-stop-gate reports ALLOW when no active loop" +if grep -q "Tracked Humanize State Blocked" "$T3_DIR/out.txt" 2>/dev/null; then + pass "rlcr-stop-gate reports tracked Humanize state with dedicated reason" else OUTPUT3=$(cat "$T3_DIR/out.txt" 2>/dev/null || true) - fail "rlcr-stop-gate reports ALLOW when no active loop" "output containing ALLOW:" "$OUTPUT3" + fail "rlcr-stop-gate reports tracked Humanize state with dedicated reason" "output containing Tracked Humanize State Blocked" "$OUTPUT3" +fi + +# Test 4: Unrelated dot-prefixed files that happen to start with .humanize- +# must not be treated as loop state. .humanize-backup and .humanizeconfig are +# explicitly allowed by the git add validator (tests/test-humanize-escape.sh); +# the tracked-state guard must stay consistent and ignore them. +T4_DIR="$TEST_DIR/t4" +mkdir -p "$T4_DIR" +setup_active_loop_fixture "$T4_DIR/project" +echo "not loop state" > "$T4_DIR/project/.humanize-backup" +echo "not loop state" > "$T4_DIR/project/.humanizeconfig" +git -C "$T4_DIR/project" add -f .humanize-backup .humanizeconfig + +set +e +( + cd "$T4_DIR/project" + "$GATE_SCRIPT" +) > "$T4_DIR/out.txt" 2>&1 +EXIT4=$? +set -e + +if [[ "$EXIT4" -eq 10 ]]; then + pass "rlcr-stop-gate does not confuse .humanize-backup with loop state" +else + OUTPUT4=$(cat "$T4_DIR/out.txt" 2>/dev/null || true) + fail "rlcr-stop-gate does not confuse .humanize-backup with loop state" "exit 10" "exit $EXIT4; output: $OUTPUT4" +fi + +if ! grep -q "Tracked Humanize State Blocked" "$T4_DIR/out.txt" 2>/dev/null; then + pass "rlcr-stop-gate does not emit tracked-state reason for .humanize-backup" +else + OUTPUT4=$(cat "$T4_DIR/out.txt" 2>/dev/null || true) + fail "rlcr-stop-gate does not emit tracked-state reason for .humanize-backup" "no Tracked Humanize State Blocked line" "$OUTPUT4" +fi + +# Test 5: No active loop -> gate allows exit (exit 0) +T5_DIR="$TEST_DIR/t5" +mkdir -p "$T5_DIR/empty-project" + +set +e +( + cd "$T5_DIR/empty-project" + "$GATE_SCRIPT" +) > "$T5_DIR/out.txt" 2>&1 +EXIT5=$? +set -e + +if [[ "$EXIT5" -eq 0 ]]; then + pass "rlcr-stop-gate exits 0 when no active loop exists" +else + OUTPUT5=$(cat "$T5_DIR/out.txt" 2>/dev/null || true) + fail "rlcr-stop-gate exits 0 when no active loop exists" "exit 0" "exit $EXIT5; output: $OUTPUT5" +fi + +if grep -q "^ALLOW:" "$T5_DIR/out.txt" 2>/dev/null; then + pass "rlcr-stop-gate reports ALLOW when no active loop" +else + OUTPUT5=$(cat "$T5_DIR/out.txt" 2>/dev/null || true) + fail "rlcr-stop-gate reports ALLOW when no active loop" "output containing ALLOW:" "$OUTPUT5" +fi + +# Test 6: Empty session_id must NOT drop transcript_path from the hook +# input JSON (regression: a `select(length > 0)` used as a plain object +# value would collapse the whole enclosing object to empty whenever any +# selected field was empty, wiping forwarded fields like transcript_path +# even though only session_id was missing). The fix replaces the plain +# select with explicit if/then/else so each field independently becomes +# null on empty input. +T6_DIR="$TEST_DIR/t6" +mkdir -p "$T6_DIR/bin" + +# Mock hook that echoes the raw stdin it received, so we can inspect the +# JSON rlcr-stop-gate.sh builds without depending on the real hook's +# pending-bg logic. +cat > "$T6_DIR/bin/loop-codex-stop-hook.sh" <<'MOCK_HOOK_EOF' +#!/usr/bin/env bash +set -euo pipefail +INPUT="$(cat)" +# Emit a JSON block so the gate wrapper walks the non-"allow on empty" +# branch. We set decision:"block" AND include a recognizable reason the +# test can grep for. +printf '%s\n' "$INPUT" > "${MOCK_HOOK_INPUT_LOG:-/dev/null}" +printf '%s\n' '{"decision":"block","reason":"mock-hook","systemMessage":"mock"}' +MOCK_HOOK_EOF +chmod +x "$T6_DIR/bin/loop-codex-stop-hook.sh" + +# Layout expected by rlcr-stop-gate.sh: HUMANIZE_ROOT/hooks/loop-codex-stop-hook.sh. +# We stage a fake plugin root pointing at the mock hook and copy the gate +# wrapper next to it so the relative resolution resolves to the mock. +mkdir -p "$T6_DIR/plugin/scripts" "$T6_DIR/plugin/hooks/lib" +cp "$T6_DIR/bin/loop-codex-stop-hook.sh" "$T6_DIR/plugin/hooks/loop-codex-stop-hook.sh" +cp "$GATE_SCRIPT" "$T6_DIR/plugin/scripts/rlcr-stop-gate.sh" +# rlcr-stop-gate sources hooks/lib/project-root.sh for PROJECT_ROOT resolution. +REAL_PROJECT_ROOT_LIB="$(dirname "$GATE_SCRIPT")/../hooks/lib/project-root.sh" +cp "$REAL_PROJECT_ROOT_LIB" "$T6_DIR/plugin/hooks/lib/project-root.sh" +chmod +x "$T6_DIR/plugin/scripts/rlcr-stop-gate.sh" + +T6_INPUT_LOG="$T6_DIR/hook-input.json" +T6_TRANSCRIPT="$T6_DIR/fake-transcript.jsonl" +: > "$T6_TRANSCRIPT" + +set +e +( + cd "$T6_DIR" + # Pin CLAUDE_PROJECT_DIR so rlcr-stop-gate resolves a root even though + # the fixture is not a git repo. This test exercises the JSON-object- + # collapse regression for empty session_id; project-root resolution is + # orthogonal and must not short-circuit the gate with an ALLOW. + CLAUDE_PROJECT_DIR="$T6_DIR" \ + MOCK_HOOK_INPUT_LOG="$T6_INPUT_LOG" \ + "$T6_DIR/plugin/scripts/rlcr-stop-gate.sh" \ + --transcript-path "$T6_TRANSCRIPT" \ + --json +) > "$T6_DIR/out.txt" 2>&1 +EXIT6=$? +set -e + +if [[ ! -f "$T6_INPUT_LOG" ]]; then + fail "rlcr-stop-gate forwards transcript_path when session_id is empty" \ + "mock hook to capture hook input JSON" \ + "captured input log missing; gate output: $(cat "$T6_DIR/out.txt" 2>/dev/null || true)" +else + T6_TRANSCRIPT_SEEN=$(jq -r '.transcript_path // "__MISSING__"' "$T6_INPUT_LOG" 2>/dev/null || echo "__PARSE_ERROR__") + T6_SESSION_SEEN=$(jq -r '.session_id | if . == null then "__NULL__" else . end' "$T6_INPUT_LOG" 2>/dev/null || echo "__PARSE_ERROR__") + if [[ "$T6_TRANSCRIPT_SEEN" == "$T6_TRANSCRIPT" ]] && [[ "$T6_SESSION_SEEN" == "__NULL__" ]]; then + pass "rlcr-stop-gate forwards transcript_path when session_id is empty (jq object-collapse fix)" + else + fail "rlcr-stop-gate forwards transcript_path when session_id is empty (jq object-collapse fix)" \ + "transcript_path=$T6_TRANSCRIPT, session_id=__NULL__" \ + "transcript_path=$T6_TRANSCRIPT_SEEN, session_id=$T6_SESSION_SEEN; raw: $(cat "$T6_INPUT_LOG" 2>/dev/null || true)" + fi +fi + +# Exit 10 because the mock hook always returns decision:"block"; ensure +# the wrapper reached the decision branch rather than exiting 20 +# (wrapper error) or 0 (bogus ALLOW from lost transcript_path). +if [[ "$EXIT6" -eq 10 ]]; then + pass "rlcr-stop-gate reaches decision branch with empty session_id + real transcript_path" +else + T6_BODY=$(cat "$T6_DIR/out.txt" 2>/dev/null || true) + fail "rlcr-stop-gate reaches decision branch with empty session_id + real transcript_path" \ + "exit 10 (mock hook returns block)" "exit $EXIT6; output: $T6_BODY" fi print_test_summary "RLCR Stop Gate Wrapper Test Summary" diff --git a/tests/test-stop-hook-bg-allow.sh b/tests/test-stop-hook-bg-allow.sh new file mode 100755 index 00000000..9fdfc0f7 --- /dev/null +++ b/tests/test-stop-hook-bg-allow.sh @@ -0,0 +1,1462 @@ +#!/usr/bin/env bash +# +# Tests for the background-task short-circuit in loop-codex-stop-hook.sh. +# +# When the current Claude Code session has dispatched background work that has +# not yet completed (via Agent run_in_background=true or Bash +# run_in_background=true), the RLCR stop hook must exit 0 with a user-facing +# systemMessage instead of running any gate or Codex review. The on-disk loop +# state must remain unchanged, so that the next natural stop (after the +# background task finishes) re-enters the normal review flow. +# +# Acceptance criteria exercised here (see +# .humanize/rlcr/2026-04-16_13-19-26/goal-tracker.md for authoritative list): +# AC-1 no bg dispatches -> normal Codex flow +# AC-2 pending subagent -> exit 0 + systemMessage +# AC-3 pending shell -> exit 0 + systemMessage +# AC-4 subagent launch + complete -> normal Codex flow +# AC-5 2 subagents + 1 shell -> systemMessage mentions "3 background" +# AC-6 missing transcript path -> normal Codex flow (fail-closed) +# AC-7 no active loop -> exit 0, no systemMessage, no Codex +# AC-8 finalize phase pending bg -> exit 0 + systemMessage +# AC-9 via rlcr-stop-gate.sh -> exit 0 (wrapper ALLOW) +# AC-10 tilde transcript path -> short-circuit fires +# AC-11 cross-session bg-pending.marker -> "parked" systemMessage, artifacts intact +# AC-12 find_active_loop prefers exact session -> returns older exact-match dir +# AC-13 same-session resume -> stale marker removed +# AC-14 cross-session stop with marker -> marker and stored session_id preserved +# AC-15 task_notification completion format -> marks launch completed +# AC-16 mixed legacy + SDK completions -> resolves to empty pending set +# AC-17 unreadable transcript with marker -> marker and session_id preserved +# AC-18 find_active_loop default ignores marker -> validators stay isolated +# AC-19 hook input omits session_id -> cross-session guard fires +# AC-20 malformed transcript with marker -> marker preserved (fail-closed) +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" + +STOP_HOOK="$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" +GATE_SCRIPT="$PROJECT_ROOT/scripts/rlcr-stop-gate.sh" + +setup_test_dir + +export XDG_CACHE_HOME="$TEST_DIR/.cache" +mkdir -p "$XDG_CACHE_HOME" + +# Fake HOME rooted inside $TEST_DIR so the tilde-path regressions (AC-10, +# AC-10b, AC-10c) do not write into the real user home. The hook, helper, +# and wrapper invocations that need tilde expansion run with HOME set to +# this directory; every other invocation keeps the real HOME. Cleanup is +# covered by the setup_test_dir EXIT trap because FAKE_HOME is under +# $TEST_DIR. +FAKE_HOME="$TEST_DIR/fake-home" +mkdir -p "$FAKE_HOME" + +# ---------------------------------------------------------------------- +# Mock lsof binaries used by the liveness-probe tests (AC-23, AC-24). +# lsof-alive exits 0 (simulates >= 1 holder: task is running). +# lsof-dead exits 1 (simulates 0 holders: task is orphaned/dead). +# ---------------------------------------------------------------------- +setup_mock_lsof() { + mkdir -p "$TEST_DIR/bin" + cat > "$TEST_DIR/bin/lsof-alive" << 'EOF' +#!/usr/bin/env bash +exit 0 +EOF + chmod +x "$TEST_DIR/bin/lsof-alive" + + cat > "$TEST_DIR/bin/lsof-dead" << 'EOF' +#!/usr/bin/env bash +exit 1 +EOF + chmod +x "$TEST_DIR/bin/lsof-dead" +} + +# ---------------------------------------------------------------------- +# Mock codex CLI: records an invocation marker and prints canned feedback. +# ---------------------------------------------------------------------- +setup_mock_codex() { + mkdir -p "$TEST_DIR/bin" + cat > "$TEST_DIR/bin/codex" << 'EOF' +#!/usr/bin/env bash +if [[ -n "${MOCK_CODEX_MARKER:-}" ]]; then + : > "$MOCK_CODEX_MARKER" +fi +printf '%s\n' "${MOCK_CODEX_OUTPUT:-Mock review feedback}" +exit 0 +EOF + chmod +x "$TEST_DIR/bin/codex" + export PATH="$TEST_DIR/bin:$PATH" +} + +# ---------------------------------------------------------------------- +# Build a minimal "active loop" project that satisfies every gate the +# stop hook enforces BEFORE it calls Codex (so tests that want to reach +# the Codex review flow can pass cleanly when bg-pending is not expected). +# ---------------------------------------------------------------------- +create_full_fixture() { + local repo_dir="$1" + local finalize_phase="${2:-false}" + + init_test_git_repo "$repo_dir" + + printf 'plans/\n' > "$repo_dir/.gitignore" + git -C "$repo_dir" add .gitignore + git -C "$repo_dir" commit -q -m "Add test gitignore" + + mkdir -p "$repo_dir/plans" + cat > "$repo_dir/plans/test-plan.md" << 'EOF' +# Test Plan + +Exercise the background-task short-circuit. +EOF + + local branch base_commit loop_dir + branch=$(git -C "$repo_dir" rev-parse --abbrev-ref HEAD) + base_commit=$(git -C "$repo_dir" rev-parse HEAD) + loop_dir="$repo_dir/.humanize/rlcr/2026-03-01_00-00-00" + mkdir -p "$loop_dir" + + cp "$repo_dir/plans/test-plan.md" "$loop_dir/plan.md" + + local state_name="state.md" + if [[ "$finalize_phase" == "true" ]]; then + state_name="finalize-state.md" + fi + + cat > "$loop_dir/$state_name" << EOF +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.5 +codex_effort: high +codex_timeout: 60 +push_every_round: false +full_review_round: 5 +plan_file: "plans/test-plan.md" +plan_tracked: false +start_branch: $branch +base_branch: $branch +base_commit: $base_commit +review_started: false +ask_codex_question: false +agent_teams: false +--- +EOF + + local summary_name="round-0-summary.md" + if [[ "$finalize_phase" == "true" ]]; then + summary_name="finalize-summary.md" + fi + cat > "$loop_dir/$summary_name" << 'EOF' +# Summary + +Exercised the background-task short-circuit. +EOF + + cat > "$loop_dir/goal-tracker.md" << 'EOF' +# Goal Tracker +## IMMUTABLE SECTION +### Ultimate Goal +Exercise background-task short-circuit. +### Acceptance Criteria +- AC-1: Hook reaches Codex review when no bg tasks are pending. +## MUTABLE SECTION +### Plan Version: 1 (Updated: Round 0) +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| Exercise stop hook | AC-1 | completed | - | +EOF + + # Echo the loop dir so callers can reach state artifacts. + echo "$loop_dir" +} + +# A project with no RLCR state file at all. +create_empty_project() { + local repo_dir="$1" + init_test_git_repo "$repo_dir" +} + +# ---------------------------------------------------------------------- +# Transcript fixture builders. +# Each prints a JSONL transcript to stdout. +# ---------------------------------------------------------------------- +emit_tool_use_assistant() { + local tool_use_id="$1" tool_name="$2" extra_input_json="$3" + local input_json="{\"run_in_background\":true${extra_input_json}}" + jq -c -n \ + --arg id "$tool_use_id" \ + --arg name "$tool_name" \ + --argjson input "$input_json" \ + '{ + type:"assistant", + message:{ + role:"assistant", + content:[ + {type:"tool_use", id:$id, name:$name, input:$input} + ] + } + }' +} + +emit_async_agent_launch_result() { + local tool_use_id="$1" agent_id="$2" + jq -c -n \ + --arg id "$tool_use_id" \ + --arg aid "$agent_id" \ + '{ + type:"user", + message:{ + role:"user", + content:[{tool_use_id:$id, type:"tool_result", + content:[{type:"text", text:"Async agent launched"}]}] + }, + toolUseResult:{isAsync:true, status:"async_launched", agentId:$aid} + }' +} + +emit_bg_shell_launch_result() { + local tool_use_id="$1" bg_task_id="$2" + jq -c -n \ + --arg id "$tool_use_id" \ + --arg bid "$bg_task_id" \ + '{ + type:"user", + message:{ + role:"user", + content:[{tool_use_id:$id, type:"tool_result", + content:[{type:"text", text:"Shell started in background"}]}] + }, + toolUseResult:{backgroundTaskId:$bid} + }' +} + +emit_task_completion_event() { + local task_id="$1" tool_use_id="$2" status="${3:-completed}" + local notif + notif=$(printf '<task-notification>\n<task-id>%s</task-id>\n<tool-use-id>%s</tool-use-id>\n<status>%s</status>\n</task-notification>' \ + "$task_id" "$tool_use_id" "$status") + jq -c -n --arg content "$notif" \ + '{type:"queue-operation", operation:"enqueue", content:$content}' +} + +emit_sdk_task_notification() { + local task_id="$1" tool_use_id="$2" status="${3:-completed}" + jq -c -n --arg tid "$task_id" --arg tu "$tool_use_id" --arg st "$status" \ + '{type:"system", subtype:"task_notification", task_id:$tid, tool_use_id:$tu, status:$st}' +} + +write_transcript() { + local path="$1" + shift + : > "$path" + for line in "$@"; do + printf '%s\n' "$line" >> "$path" + done +} + +# ---------------------------------------------------------------------- +# Invoke the stop hook with a crafted hook input JSON. The optional third +# argument overrides HOME for the hook invocation only, so tilde-path +# regressions can point at a fake HOME rooted under $TEST_DIR without +# leaking into the real user home. +# Sets RUN_EXIT_CODE, RUN_OUTPUT, RUN_MARKER. +# ---------------------------------------------------------------------- +run_stop_hook_with_input() { + local repo_dir="$1" hook_input_json="$2" home_override="${3:-}" lsof_bin_override="${4:-}" + + RUN_MARKER="$repo_dir/codex-called.marker" + rm -f "$RUN_MARKER" + + set +e + RUN_OUTPUT=$( + cd "$repo_dir" + [[ -n "$home_override" ]] && export HOME="$home_override" + [[ -n "$lsof_bin_override" ]] && export LSOF_BIN="$lsof_bin_override" + CLAUDE_PROJECT_DIR="$repo_dir" \ + MOCK_CODEX_MARKER="$RUN_MARKER" \ + MOCK_CODEX_OUTPUT="Mock review feedback" \ + "$STOP_HOOK" <<<"$hook_input_json" 2>&1 + ) + RUN_EXIT_CODE=$? + set -e +} + +assert_systemmessage_only() { + local test_name="$1" repo_dir="$2" state_file="$3" expected_count_regex="$4" + + local before_hash after_hash + before_hash=$(sha256sum "$state_file" 2>/dev/null | awk '{print $1}') + + if [[ "$RUN_EXIT_CODE" -ne 0 ]]; then + fail "$test_name" "exit 0 with systemMessage" \ + "exit $RUN_EXIT_CODE; output: $RUN_OUTPUT" + return + fi + if [[ -f "$RUN_MARKER" ]]; then + fail "$test_name" "Codex NOT invoked" \ + "marker present (Codex was called); output: $RUN_OUTPUT" + return + fi + local system_message + system_message=$(printf '%s' "$RUN_OUTPUT" | jq -r '.systemMessage // empty' 2>/dev/null || echo "") + if [[ -z "$system_message" ]]; then + fail "$test_name" "JSON output with systemMessage" \ + "no systemMessage in output: $RUN_OUTPUT" + return + fi + if [[ -n "$expected_count_regex" ]]; then + if ! printf '%s' "$system_message" | grep -Eq "$expected_count_regex"; then + fail "$test_name" \ + "systemMessage matches /$expected_count_regex/" \ + "got: $system_message" + return + fi + fi + after_hash=$(sha256sum "$state_file" 2>/dev/null | awk '{print $1}') + if [[ "$before_hash" != "$after_hash" ]]; then + fail "$test_name" "state file unchanged" \ + "hash changed ($before_hash -> $after_hash)" + return + fi + pass "$test_name" +} + +assert_reached_codex() { + local test_name="$1" + if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ -f "$RUN_MARKER" ]]; then + pass "$test_name" + else + fail "$test_name" "exit 0 and Codex invoked (marker present)" \ + "exit $RUN_EXIT_CODE, marker=$(test -f "$RUN_MARKER" && echo present || echo missing); output: $RUN_OUTPUT" + fi +} + +setup_mock_codex +setup_mock_lsof + +# Transcripts live outside any test repo to avoid tripping git cleanliness +# gates in the stop hook. +TRANSCRIPTS_DIR="$TEST_DIR/transcripts" +mkdir -p "$TRANSCRIPTS_DIR" + +echo "==========================================" +echo "Stop Hook Background-Task Allow Tests" +echo "==========================================" +echo "" + +# ---------------- AC-1 ---------------- +echo "Test AC-1: No bg dispatches -> reaches Codex" +AC1_REPO="$TEST_DIR/ac1" +create_full_fixture "$AC1_REPO" > /dev/null +AC1_TRANSCRIPT="$TRANSCRIPTS_DIR/ac1.jsonl" +write_transcript "$AC1_TRANSCRIPT" '{"type":"user","message":{"role":"user","content":"hello"}}' + +AC1_INPUT=$(jq -c -n --arg tp "$AC1_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC1_REPO" "$AC1_INPUT" +assert_reached_codex "AC-1: transcript without bg dispatches proceeds to Codex review" + +# ---------------- AC-2 ---------------- +echo "Test AC-2: One pending background subagent -> exit 0 + systemMessage" +AC2_REPO="$TEST_DIR/ac2" +AC2_LOOP=$(create_full_fixture "$AC2_REPO") +AC2_STATE="$AC2_LOOP/state.md" +AC2_TRANSCRIPT="$TRANSCRIPTS_DIR/ac2.jsonl" +AC2_LINE_LAUNCH=$(emit_tool_use_assistant "toolu_A" "Agent" ',"description":"x","prompt":"x"') +AC2_LINE_RESULT=$(emit_async_agent_launch_result "toolu_A" "agent_pending_A") +write_transcript "$AC2_TRANSCRIPT" "$AC2_LINE_LAUNCH" "$AC2_LINE_RESULT" + +AC2_INPUT=$(jq -c -n --arg tp "$AC2_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC2_REPO" "$AC2_INPUT" +assert_systemmessage_only \ + "AC-2: pending subagent triggers exit 0 + systemMessage, state untouched" \ + "$AC2_REPO" "$AC2_STATE" "1 background task" + +# ---------------- AC-3 ---------------- +echo "Test AC-3: One pending background shell -> exit 0 + systemMessage" +AC3_REPO="$TEST_DIR/ac3" +AC3_LOOP=$(create_full_fixture "$AC3_REPO") +AC3_STATE="$AC3_LOOP/state.md" +AC3_TRANSCRIPT="$TRANSCRIPTS_DIR/ac3.jsonl" +AC3_LINE_LAUNCH=$(emit_tool_use_assistant "toolu_B" "Bash" ',"command":"sleep 30"') +AC3_LINE_RESULT=$(emit_bg_shell_launch_result "toolu_B" "shell_pending_B") +write_transcript "$AC3_TRANSCRIPT" "$AC3_LINE_LAUNCH" "$AC3_LINE_RESULT" + +AC3_INPUT=$(jq -c -n --arg tp "$AC3_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC3_REPO" "$AC3_INPUT" +assert_systemmessage_only \ + "AC-3: pending background shell triggers exit 0 + systemMessage" \ + "$AC3_REPO" "$AC3_STATE" "1 background task" + +# ---------------- AC-4 ---------------- +echo "Test AC-4: Launched subagent with completion notification -> reaches Codex" +AC4_REPO="$TEST_DIR/ac4" +create_full_fixture "$AC4_REPO" > /dev/null +AC4_TRANSCRIPT="$TRANSCRIPTS_DIR/ac4.jsonl" +AC4_LAUNCH=$(emit_tool_use_assistant "toolu_C" "Agent" ',"description":"x","prompt":"x"') +AC4_RESULT=$(emit_async_agent_launch_result "toolu_C" "agent_done_C") +AC4_COMPLETE=$(emit_task_completion_event "agent_done_C" "toolu_C" "completed") +write_transcript "$AC4_TRANSCRIPT" "$AC4_LAUNCH" "$AC4_RESULT" "$AC4_COMPLETE" + +AC4_INPUT=$(jq -c -n --arg tp "$AC4_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC4_REPO" "$AC4_INPUT" +assert_reached_codex "AC-4: subagent with matching completion notification proceeds to Codex review" + +# ---------------- AC-5 ---------------- +echo "Test AC-5: 2 pending subagents + 1 pending shell -> systemMessage mentions 3" +AC5_REPO="$TEST_DIR/ac5" +AC5_LOOP=$(create_full_fixture "$AC5_REPO") +AC5_STATE="$AC5_LOOP/state.md" +AC5_TRANSCRIPT="$TRANSCRIPTS_DIR/ac5.jsonl" +AC5_L1_LAUNCH=$(emit_tool_use_assistant "toolu_D1" "Agent" ',"description":"x","prompt":"x"') +AC5_L1_RESULT=$(emit_async_agent_launch_result "toolu_D1" "agent_pending_D1") +AC5_L2_LAUNCH=$(emit_tool_use_assistant "toolu_D2" "Agent" ',"description":"y","prompt":"y"') +AC5_L2_RESULT=$(emit_async_agent_launch_result "toolu_D2" "agent_pending_D2") +AC5_L3_LAUNCH=$(emit_tool_use_assistant "toolu_D3" "Bash" ',"command":"sleep 30"') +AC5_L3_RESULT=$(emit_bg_shell_launch_result "toolu_D3" "shell_pending_D3") +write_transcript "$AC5_TRANSCRIPT" \ + "$AC5_L1_LAUNCH" "$AC5_L1_RESULT" \ + "$AC5_L2_LAUNCH" "$AC5_L2_RESULT" \ + "$AC5_L3_LAUNCH" "$AC5_L3_RESULT" + +AC5_INPUT=$(jq -c -n --arg tp "$AC5_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC5_REPO" "$AC5_INPUT" +assert_systemmessage_only \ + "AC-5: 2 pending subagents + 1 pending shell -> systemMessage mentions '3 background task(s)'" \ + "$AC5_REPO" "$AC5_STATE" "3 background task\\(s\\)" + +# ---------------- AC-6 ---------------- +echo "Test AC-6: missing transcript path -> reaches Codex (fail-closed)" +AC6_REPO="$TEST_DIR/ac6" +create_full_fixture "$AC6_REPO" > /dev/null +AC6_INPUT=$(jq -c -n --arg tp "/nonexistent/file-$$.jsonl" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC6_REPO" "$AC6_INPUT" +assert_reached_codex "AC-6: missing transcript_path proceeds to Codex review (fail-closed)" + +# Also: empty transcript_path field +AC6B_REPO="$TEST_DIR/ac6b" +create_full_fixture "$AC6B_REPO" > /dev/null +AC6B_INPUT='{"transcript_path":""}' +run_stop_hook_with_input "$AC6B_REPO" "$AC6B_INPUT" +assert_reached_codex "AC-6b: empty transcript_path string proceeds to Codex review" + +# And: no transcript_path key at all +AC6C_REPO="$TEST_DIR/ac6c" +create_full_fixture "$AC6C_REPO" > /dev/null +AC6C_INPUT='{}' +run_stop_hook_with_input "$AC6C_REPO" "$AC6C_INPUT" +assert_reached_codex "AC-6c: hook input with no transcript_path proceeds to Codex review" + +# ---------------- AC-7 ---------------- +echo "Test AC-7: No active loop -> exit 0, no systemMessage, no Codex" +AC7_REPO="$TEST_DIR/ac7" +create_empty_project "$AC7_REPO" +AC7_TRANSCRIPT="$TRANSCRIPTS_DIR/ac7.jsonl" +AC7_LAUNCH=$(emit_tool_use_assistant "toolu_E" "Agent" ',"description":"x","prompt":"x"') +AC7_RESULT=$(emit_async_agent_launch_result "toolu_E" "agent_pending_E") +write_transcript "$AC7_TRANSCRIPT" "$AC7_LAUNCH" "$AC7_RESULT" +AC7_INPUT=$(jq -c -n --arg tp "$AC7_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC7_REPO" "$AC7_INPUT" + +AC7_SYS_MSG=$(printf '%s' "$RUN_OUTPUT" | jq -r '.systemMessage // empty' 2>/dev/null || echo "") +if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ ! -f "$RUN_MARKER" ]] && [[ -z "$AC7_SYS_MSG" ]]; then + pass "AC-7: no active loop takes original exit-0 path without systemMessage" +else + fail "AC-7: no active loop takes original exit-0 path without systemMessage" \ + "exit 0, no Codex marker, no systemMessage" \ + "exit $RUN_EXIT_CODE, marker=$(test -f "$RUN_MARKER" && echo present || echo missing), systemMessage='$AC7_SYS_MSG'; output: $RUN_OUTPUT" +fi + +# ---------------- AC-8 ---------------- +echo "Test AC-8: Finalize phase + pending bg -> exit 0 + systemMessage" +AC8_REPO="$TEST_DIR/ac8" +AC8_LOOP=$(create_full_fixture "$AC8_REPO" true) +AC8_STATE="$AC8_LOOP/finalize-state.md" +AC8_TRANSCRIPT="$TRANSCRIPTS_DIR/ac8.jsonl" +AC8_LAUNCH=$(emit_tool_use_assistant "toolu_F" "Agent" ',"description":"x","prompt":"x"') +AC8_RESULT=$(emit_async_agent_launch_result "toolu_F" "agent_pending_F") +write_transcript "$AC8_TRANSCRIPT" "$AC8_LAUNCH" "$AC8_RESULT" +AC8_INPUT=$(jq -c -n --arg tp "$AC8_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC8_REPO" "$AC8_INPUT" +assert_systemmessage_only \ + "AC-8: finalize phase with pending bg task -> exit 0 + systemMessage" \ + "$AC8_REPO" "$AC8_STATE" "1 background task" + +# ---------------- AC-9 ---------------- +echo "Test AC-9: rlcr-stop-gate.sh forwards transcript_path to hook" +AC9_REPO="$TEST_DIR/ac9" +create_full_fixture "$AC9_REPO" > /dev/null +AC9_TRANSCRIPT="$TRANSCRIPTS_DIR/ac9.jsonl" +AC9_LAUNCH=$(emit_tool_use_assistant "toolu_G" "Agent" ',"description":"x","prompt":"x"') +AC9_RESULT=$(emit_async_agent_launch_result "toolu_G" "agent_pending_G") +write_transcript "$AC9_TRANSCRIPT" "$AC9_LAUNCH" "$AC9_RESULT" + +AC9_OUT="$AC9_REPO/gate-out.txt" +# Pass --project-root explicitly so an inherited CLAUDE_PROJECT_DIR +# from the outer runner cannot redirect the gate to the outer repo. +set +e +( + cd "$AC9_REPO" + "$GATE_SCRIPT" --project-root "$AC9_REPO" --transcript-path "$AC9_TRANSCRIPT" +) > "$AC9_OUT" 2>&1 +AC9_EXIT=$? +set -e + +if [[ "$AC9_EXIT" -eq 0 ]] && grep -q "^ALLOW:" "$AC9_OUT"; then + pass "AC-9: rlcr-stop-gate.sh exits 0 with ALLOW when bg tasks are pending" +else + AC9_BODY=$(cat "$AC9_OUT" 2>/dev/null || true) + fail "AC-9: rlcr-stop-gate.sh exits 0 with ALLOW when bg tasks are pending" \ + "exit 0 and output containing ALLOW:" \ + "exit $AC9_EXIT; output: $AC9_BODY" +fi + +# ---------------- AC-10 / AC-10b / AC-10c ---------------- +# Regression: real sessions pass transcript_path as "~/.claude/projects/...". +# Without tilde expansion the file check `[[ -f "~/..." ]]` is always false, +# so the short-circuit silently misses pending background tasks. +# +# The fixture lives under a fake HOME rooted inside $TEST_DIR so the tests +# remain portable on sandboxed or read-only-HOME environments. Only the +# specific hook / helper / wrapper invocations that need tilde expansion +# run with HOME=$FAKE_HOME; the rest of the suite keeps the real HOME. +echo "Test AC-10: '~/...' transcript path still triggers short-circuit" +AC10_REPO="$TEST_DIR/ac10" +AC10_LOOP=$(create_full_fixture "$AC10_REPO") +AC10_STATE="$AC10_LOOP/state.md" + +mkdir -p "$FAKE_HOME/session-data" +AC10_TRANSCRIPT="$FAKE_HOME/session-data/ac10.jsonl" +AC10_LAUNCH=$(emit_tool_use_assistant "toolu_H" "Agent" ',"description":"x","prompt":"x"') +AC10_RESULT=$(emit_async_agent_launch_result "toolu_H" "agent_pending_H") +write_transcript "$AC10_TRANSCRIPT" "$AC10_LAUNCH" "$AC10_RESULT" + +# Build the tilde-form string literally. Do NOT let the shell expand "~". +AC10_TILDE_PATH="~/session-data/ac10.jsonl" +AC10_INPUT=$(jq -c -n --arg tp "$AC10_TILDE_PATH" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC10_REPO" "$AC10_INPUT" "$FAKE_HOME" +assert_systemmessage_only \ + "AC-10: '~/'-prefixed transcript_path is expanded and short-circuits on pending bg" \ + "$AC10_REPO" "$AC10_STATE" "1 background task" + +# Also prove the helper works directly against a "~/..." argument under a +# fake HOME. Avoids masking a helper regression behind the hook's own +# normalization. +AC10_HELPER_OUT=$( + cd "$AC10_REPO" + HOME="$FAKE_HOME" + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + list_pending_background_task_ids "$AC10_TILDE_PATH" 2>/dev/null | sort -u +) +if printf '%s\n' "$AC10_HELPER_OUT" | grep -qx 'agent_pending_H'; then + pass "AC-10b: list_pending_background_task_ids expands '~/...' directly" +else + fail "AC-10b: list_pending_background_task_ids expands '~/...' directly" \ + "output containing 'agent_pending_H'" "$AC10_HELPER_OUT" +fi + +# Verify the gate wrapper path with a tilde-form --transcript-path also +# reaches the short-circuit. AC-9 uses an absolute transcript path; this +# covers the same code path with a "~/..." form. +# +# Fresh fixture so the repo has no prior bg-pending.marker (AC-10 left +# one behind). The ambiguous-caller guard in the hook only silences the +# wrapper when a marker already exists; a clean repo falls through to +# the normal short-circuit so the systemMessage surfaces in the wrapper +# output. +echo "Test AC-10c: rlcr-stop-gate.sh with '~/...' --transcript-path -> ALLOW" +AC10C_REPO="$TEST_DIR/ac10c" +create_full_fixture "$AC10C_REPO" > /dev/null +mkdir -p "$FAKE_HOME/session-data-c" +AC10C_TRANSCRIPT="$FAKE_HOME/session-data-c/ac10c.jsonl" +AC10C_LAUNCH=$(emit_tool_use_assistant "toolu_H2" "Agent" ',"description":"x","prompt":"x"') +AC10C_RESULT=$(emit_async_agent_launch_result "toolu_H2" "agent_pending_H2") +write_transcript "$AC10C_TRANSCRIPT" "$AC10C_LAUNCH" "$AC10C_RESULT" +AC10C_TILDE_PATH="~/session-data-c/ac10c.jsonl" + +AC10C_OUT="$TEST_DIR/ac10c-out.txt" +set +e +( + cd "$AC10C_REPO" + HOME="$FAKE_HOME" "$GATE_SCRIPT" \ + --project-root "$AC10C_REPO" \ + --transcript-path "$AC10C_TILDE_PATH" +) > "$AC10C_OUT" 2>&1 +AC10C_EXIT=$? +set -e + +if [[ "$AC10C_EXIT" -eq 0 ]] \ + && grep -q "^ALLOW:" "$AC10C_OUT" \ + && grep -q "background task" "$AC10C_OUT"; then + pass "AC-10c: rlcr-stop-gate.sh expands '~/...' and emits ALLOW with systemMessage" +else + AC10C_BODY=$(cat "$AC10C_OUT" 2>/dev/null || true) + fail "AC-10c: rlcr-stop-gate.sh expands '~/...' and emits ALLOW with systemMessage" \ + "exit 0 + output containing ALLOW: and 'background task'" \ + "exit $AC10C_EXIT; output: $AC10C_BODY" +fi + +# ---------------- AC-11 / AC-11b ---------------- +# Cross-session parked-loop guard: when a loop in the repo carries the +# bg-pending.marker and its stored session_id does not match the caller, +# the stop hook must exit 0 with a dedicated "parked by another session" +# systemMessage and leave every on-disk artifact intact. The current +# session has no authority to advance or cleanup a foreign parked loop +# because its transcript cannot observe the other session's bg task. +echo "Test AC-11: cross-session bg-pending.marker emits 'parked' systemMessage" +AC11_REPO="$TEST_DIR/ac11" +AC11_LOOP=$(create_full_fixture "$AC11_REPO") +AC11_STATE="$AC11_LOOP/state.md" +AC11_MARKER="$AC11_LOOP/bg-pending.marker" + +# Override state.md with an explicit stored session_id so find_active_loop +# sees a real mismatch when we later pass a different session_id. +AC11_BRANCH=$(git -C "$AC11_REPO" rev-parse --abbrev-ref HEAD) +AC11_BASE_COMMIT=$(git -C "$AC11_REPO" rev-parse HEAD) +cat > "$AC11_STATE" <<EOF_AC11 +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.5 +codex_effort: high +codex_timeout: 60 +push_every_round: false +full_review_round: 5 +plan_file: "plans/test-plan.md" +plan_tracked: false +start_branch: $AC11_BRANCH +base_branch: $AC11_BRANCH +base_commit: $AC11_BASE_COMMIT +review_started: false +ask_codex_question: false +agent_teams: false +session_id: session_alpha +--- +EOF_AC11 +AC11_STATE_HASH_BEFORE=$(sha256sum "$AC11_STATE" | awk '{print $1}') + +# Simulate the state left by a previous session that took the short-circuit. +: > "$AC11_MARKER" + +AC11_TRANSCRIPT="$TRANSCRIPTS_DIR/ac11.jsonl" +AC11_LAUNCH=$(emit_tool_use_assistant "toolu_I" "Agent" ',"description":"x","prompt":"x"') +AC11_RESULT=$(emit_async_agent_launch_result "toolu_I" "agent_pending_I") +write_transcript "$AC11_TRANSCRIPT" "$AC11_LAUNCH" "$AC11_RESULT" + +AC11_INPUT=$(jq -c -n --arg tp "$AC11_TRANSCRIPT" \ + '{transcript_path:$tp, session_id:"session_beta"}') +run_stop_hook_with_input "$AC11_REPO" "$AC11_INPUT" +AC11_SYS_MSG=$(printf '%s' "$RUN_OUTPUT" | jq -r '.systemMessage // empty' 2>/dev/null || echo "") +AC11_STATE_HASH_AFTER=$(sha256sum "$AC11_STATE" | awk '{print $1}') +if [[ "$RUN_EXIT_CODE" -eq 0 ]] \ + && [[ ! -f "$RUN_MARKER" ]] \ + && [[ -f "$AC11_MARKER" ]] \ + && [[ "$AC11_STATE_HASH_BEFORE" == "$AC11_STATE_HASH_AFTER" ]] \ + && printf '%s' "$AC11_SYS_MSG" | grep -qi "parked"; then + pass "AC-11: cross-session stop exits with 'parked' systemMessage; marker and session_id untouched" +else + fail "AC-11: cross-session stop exits with 'parked' systemMessage; marker and session_id untouched" \ + "exit 0 + systemMessage matches /parked/ + marker stays + state.md byte-identical + no Codex" \ + "exit $RUN_EXIT_CODE, codex_marker=$(test -f "$RUN_MARKER" && echo present || echo missing), bg_marker=$(test -f "$AC11_MARKER" && echo present || echo missing), state_unchanged=$([[ "$AC11_STATE_HASH_BEFORE" == "$AC11_STATE_HASH_AFTER" ]] && echo yes || echo no), systemMessage='$AC11_SYS_MSG'; output: $RUN_OUTPUT" +fi + +# Negative counterpart: same session mismatch but NO marker must still +# reject the loop (preserving the existing session-bound isolation when +# the loop was not explicitly parked). +echo "Test AC-11b: cross-session without marker is still rejected" +AC11B_REPO="$TEST_DIR/ac11b" +AC11B_LOOP=$(create_full_fixture "$AC11B_REPO") +AC11B_STATE="$AC11B_LOOP/state.md" +AC11B_BRANCH=$(git -C "$AC11B_REPO" rev-parse --abbrev-ref HEAD) +AC11B_BASE_COMMIT=$(git -C "$AC11B_REPO" rev-parse HEAD) +cat > "$AC11B_STATE" <<EOF_AC11B +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.5 +codex_effort: high +codex_timeout: 60 +push_every_round: false +full_review_round: 5 +plan_file: "plans/test-plan.md" +plan_tracked: false +start_branch: $AC11B_BRANCH +base_branch: $AC11B_BRANCH +base_commit: $AC11B_BASE_COMMIT +review_started: false +ask_codex_question: false +agent_teams: false +session_id: session_alpha +--- +EOF_AC11B +# Intentionally NO marker in AC11B_LOOP. + +AC11B_TRANSCRIPT="$TRANSCRIPTS_DIR/ac11b.jsonl" +AC11B_LAUNCH=$(emit_tool_use_assistant "toolu_J" "Agent" ',"description":"x","prompt":"x"') +AC11B_RESULT=$(emit_async_agent_launch_result "toolu_J" "agent_pending_J") +write_transcript "$AC11B_TRANSCRIPT" "$AC11B_LAUNCH" "$AC11B_RESULT" + +AC11B_INPUT=$(jq -c -n --arg tp "$AC11B_TRANSCRIPT" \ + '{transcript_path:$tp, session_id:"session_beta"}') +run_stop_hook_with_input "$AC11B_REPO" "$AC11B_INPUT" +AC11B_SYS_MSG=$(printf '%s' "$RUN_OUTPUT" | jq -r '.systemMessage // empty' 2>/dev/null || echo "") +if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ ! -f "$RUN_MARKER" ]] && [[ -z "$AC11B_SYS_MSG" ]]; then + pass "AC-11b: cross-session without marker keeps existing isolation (no adoption)" +else + fail "AC-11b: cross-session without marker keeps existing isolation (no adoption)" \ + "exit 0, no Codex marker, no systemMessage" \ + "exit $RUN_EXIT_CODE, marker=$(test -f "$RUN_MARKER" && echo present || echo missing), systemMessage='$AC11B_SYS_MSG'; output: $RUN_OUTPUT" +fi + +# AC-11c: short-circuit should actually write bg-pending.marker so the +# adoption path in AC-11 is reachable from real usage (not only from +# synthetic test setup). +echo "Test AC-11c: short-circuit writes bg-pending.marker" +AC11C_REPO="$TEST_DIR/ac11c" +AC11C_LOOP=$(create_full_fixture "$AC11C_REPO") +AC11C_MARKER="$AC11C_LOOP/bg-pending.marker" +[[ -e "$AC11C_MARKER" ]] && rm -f "$AC11C_MARKER" + +AC11C_TRANSCRIPT="$TRANSCRIPTS_DIR/ac11c.jsonl" +AC11C_LAUNCH=$(emit_tool_use_assistant "toolu_K" "Agent" ',"description":"x","prompt":"x"') +AC11C_RESULT=$(emit_async_agent_launch_result "toolu_K" "agent_pending_K") +write_transcript "$AC11C_TRANSCRIPT" "$AC11C_LAUNCH" "$AC11C_RESULT" + +AC11C_INPUT=$(jq -c -n --arg tp "$AC11C_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC11C_REPO" "$AC11C_INPUT" +if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ -f "$AC11C_MARKER" ]]; then + pass "AC-11c: short-circuit path writes bg-pending.marker into loop dir" +else + fail "AC-11c: short-circuit path writes bg-pending.marker into loop dir" \ + "exit 0 and bg-pending.marker present" \ + "exit $RUN_EXIT_CODE, marker=$(test -f "$AC11C_MARKER" && echo present || echo missing); output: $RUN_OUTPUT" +fi + +# ---------------- AC-12 ---------------- +# Session isolation under multiple concurrent RLCR loops: when the caller's +# own exact-match dir exists in the listing, find_active_loop must return +# it even if a newer sibling dir (belonging to another session) also has a +# bg-pending.marker. The marker fallback is only for orphan recovery when +# no exact match exists. +echo "Test AC-12: find_active_loop prefers exact session match over marker" +AC12_BASE="$TEST_DIR/ac12-loops" +mkdir -p "$AC12_BASE/2026-03-02_00-00-00" +mkdir -p "$AC12_BASE/2026-03-01_00-00-00" + +cat > "$AC12_BASE/2026-03-02_00-00-00/state.md" <<'EOF_AC12_NEWER' +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.5 +codex_effort: high +session_id: session_foreign +--- +EOF_AC12_NEWER +: > "$AC12_BASE/2026-03-02_00-00-00/bg-pending.marker" + +cat > "$AC12_BASE/2026-03-01_00-00-00/state.md" <<'EOF_AC12_OLDER' +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.5 +codex_effort: high +session_id: session_home +--- +EOF_AC12_OLDER + +AC12_RESULT=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + find_active_loop "$AC12_BASE" "session_home" +) +if [[ "$AC12_RESULT" == "$AC12_BASE/2026-03-01_00-00-00" ]]; then + pass "AC-12: find_active_loop returns older exact-match dir over newer marker dir" +else + fail "AC-12: find_active_loop returns older exact-match dir over newer marker dir" \ + "$AC12_BASE/2026-03-01_00-00-00" "$AC12_RESULT" +fi + +if [[ -f "$AC12_BASE/2026-03-02_00-00-00/bg-pending.marker" ]]; then + pass "AC-12b: foreign session's marker untouched by find_active_loop scan" +else + fail "AC-12b: foreign session's marker untouched by find_active_loop scan" \ + "newer dir marker still present" "marker was removed" +fi + +# ---------------- AC-13 ---------------- +# Same-session resume after background completion: a stale marker from the +# previous short-circuit must be cleaned up on the next stop where no bg is +# pending. State.md session_id stays put because it already matches. +echo "Test AC-13: same-session resume removes stale bg-pending.marker" +AC13_REPO="$TEST_DIR/ac13" +AC13_LOOP=$(create_full_fixture "$AC13_REPO") +AC13_STATE="$AC13_LOOP/state.md" +AC13_BRANCH=$(git -C "$AC13_REPO" rev-parse --abbrev-ref HEAD) +AC13_BASE_COMMIT=$(git -C "$AC13_REPO" rev-parse HEAD) +cat > "$AC13_STATE" <<EOF_AC13 +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.5 +codex_effort: high +codex_timeout: 60 +push_every_round: false +full_review_round: 5 +plan_file: "plans/test-plan.md" +plan_tracked: false +start_branch: $AC13_BRANCH +base_branch: $AC13_BRANCH +base_commit: $AC13_BASE_COMMIT +review_started: false +ask_codex_question: false +agent_teams: false +session_id: session_home +--- +EOF_AC13 +: > "$AC13_LOOP/bg-pending.marker" + +AC13_TRANSCRIPT="$TRANSCRIPTS_DIR/ac13.jsonl" +write_transcript "$AC13_TRANSCRIPT" '{"type":"user","message":{"role":"user","content":"hello"}}' +AC13_INPUT=$(jq -c -n --arg tp "$AC13_TRANSCRIPT" \ + '{transcript_path:$tp, session_id:"session_home"}') +run_stop_hook_with_input "$AC13_REPO" "$AC13_INPUT" + +if [[ ! -f "$AC13_LOOP/bg-pending.marker" ]]; then + pass "AC-13: marker removed on non-short-circuit resume (same session)" +else + fail "AC-13: marker removed on non-short-circuit resume (same session)" \ + "marker absent" "marker still present" +fi + +if grep -q "^session_id: session_home$" "$AC13_STATE"; then + pass "AC-13b: same-session resume leaves state.md session_id unchanged" +else + fail "AC-13b: same-session resume leaves state.md session_id unchanged" \ + "session_id: session_home" "$(grep '^session_id:' "$AC13_STATE" || echo '(missing)')" +fi + +# ---------------- AC-14 ---------------- +# Anti-hijack: a different session walking in MUST NOT rewrite the stored +# session_id and MUST NOT delete bg-pending.marker, even when its own +# transcript shows no pending bg events. The foreign session's transcript +# cannot observe the parking session's bg activity, so nothing the new +# session sees is authoritative. The cross-session guard takes over +# instead. +echo "Test AC-14: cross-session stop preserves marker and stored session_id" +AC14_REPO="$TEST_DIR/ac14" +AC14_LOOP=$(create_full_fixture "$AC14_REPO") +AC14_STATE="$AC14_LOOP/state.md" +AC14_MARKER="$AC14_LOOP/bg-pending.marker" +AC14_BRANCH=$(git -C "$AC14_REPO" rev-parse --abbrev-ref HEAD) +AC14_BASE_COMMIT=$(git -C "$AC14_REPO" rev-parse HEAD) +cat > "$AC14_STATE" <<EOF_AC14 +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.5 +codex_effort: high +codex_timeout: 60 +push_every_round: false +full_review_round: 5 +plan_file: "plans/test-plan.md" +plan_tracked: false +start_branch: $AC14_BRANCH +base_branch: $AC14_BRANCH +base_commit: $AC14_BASE_COMMIT +review_started: false +ask_codex_question: false +agent_teams: false +session_id: session_foreign +--- +EOF_AC14 +: > "$AC14_MARKER" + +AC14_TRANSCRIPT="$TRANSCRIPTS_DIR/ac14.jsonl" +write_transcript "$AC14_TRANSCRIPT" '{"type":"user","message":{"role":"user","content":"hello"}}' +AC14_INPUT=$(jq -c -n --arg tp "$AC14_TRANSCRIPT" \ + '{transcript_path:$tp, session_id:"session_home"}') +run_stop_hook_with_input "$AC14_REPO" "$AC14_INPUT" + +if [[ -f "$AC14_MARKER" ]]; then + pass "AC-14: cross-session stop preserves bg-pending.marker" +else + fail "AC-14: cross-session stop preserves bg-pending.marker" \ + "marker still present" "marker was removed (foreign-session hijack)" +fi + +if grep -q "^session_id: session_foreign$" "$AC14_STATE"; then + pass "AC-14b: cross-session stop leaves stored session_id intact" +else + fail "AC-14b: cross-session stop leaves stored session_id intact" \ + "session_id: session_foreign" "$(grep '^session_id:' "$AC14_STATE" || echo '(missing)')" +fi + +# ---------------- AC-15 ---------------- +# Completion recognition: the current Claude Code transcript format emits +# background-task completion as +# type: "system", subtype: "task_notification", task_id: "..." +# The helper must recognise this form (not only the legacy queue-operation +# XML block) or launched tasks will stay "pending" forever. +echo "Test AC-15: task_notification system records mark launches completed" +AC15_TRANSCRIPT="$TRANSCRIPTS_DIR/ac15.jsonl" +AC15_LAUNCH=$(emit_tool_use_assistant "toolu_L" "Agent" ',"description":"x","prompt":"x"') +AC15_RESULT=$(emit_async_agent_launch_result "toolu_L" "agent_done_L") +AC15_NOTIF=$(emit_sdk_task_notification "agent_done_L" "toolu_L" "completed") +write_transcript "$AC15_TRANSCRIPT" "$AC15_LAUNCH" "$AC15_RESULT" "$AC15_NOTIF" + +AC15_PENDING=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + list_pending_background_task_ids "$AC15_TRANSCRIPT" 2>/dev/null +) +if [[ -z "$AC15_PENDING" ]]; then + pass "AC-15: task_notification completion removes the matching launch from pending" +else + fail "AC-15: task_notification completion removes the matching launch from pending" \ + "empty pending list" "got: $AC15_PENDING" +fi + +# ---------------- AC-16 ---------------- +# Completion recognition mixed formats: two launches, one completed via the +# legacy queue-operation XML block, the other via the current +# system/task_notification record. Union of both sources must resolve to +# an empty pending set. +echo "Test AC-16: helper unions legacy queue-operation and task_notification completions" +AC16_TRANSCRIPT="$TRANSCRIPTS_DIR/ac16.jsonl" +AC16_L1=$(emit_tool_use_assistant "toolu_M1" "Agent" ',"description":"x","prompt":"x"') +AC16_R1=$(emit_async_agent_launch_result "toolu_M1" "agent_legacy_M1") +AC16_C1=$(emit_task_completion_event "agent_legacy_M1" "toolu_M1" "completed") +AC16_L2=$(emit_tool_use_assistant "toolu_M2" "Agent" ',"description":"y","prompt":"y"') +AC16_R2=$(emit_async_agent_launch_result "toolu_M2" "agent_sdk_M2") +AC16_C2=$(emit_sdk_task_notification "agent_sdk_M2" "toolu_M2" "completed") +write_transcript "$AC16_TRANSCRIPT" \ + "$AC16_L1" "$AC16_R1" "$AC16_C1" \ + "$AC16_L2" "$AC16_R2" "$AC16_C2" + +AC16_PENDING=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + list_pending_background_task_ids "$AC16_TRANSCRIPT" 2>/dev/null +) +if [[ -z "$AC16_PENDING" ]]; then + pass "AC-16: mixed legacy+SDK completion records resolve to empty pending set" +else + fail "AC-16: mixed legacy+SDK completion records resolve to empty pending set" \ + "empty pending list" "got: $AC16_PENDING" +fi + +# ---------------- AC-17 ---------------- +# Marker preservation when completion cannot be verified: if +# transcript_path is missing or unreadable, has_pending_background_tasks +# fails closed (returns no pending). The non-short-circuit cleanup must NOT +# erase bg-pending.marker or rewrite session_id in that case, because the +# cross-session recovery signal is still needed. +echo "Test AC-17: missing transcript preserves bg-pending.marker and session_id" +AC17_REPO="$TEST_DIR/ac17" +AC17_LOOP=$(create_full_fixture "$AC17_REPO") +AC17_STATE="$AC17_LOOP/state.md" +AC17_BRANCH=$(git -C "$AC17_REPO" rev-parse --abbrev-ref HEAD) +AC17_BASE_COMMIT=$(git -C "$AC17_REPO" rev-parse HEAD) +cat > "$AC17_STATE" <<EOF_AC17 +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.5 +codex_effort: high +codex_timeout: 60 +push_every_round: false +full_review_round: 5 +plan_file: "plans/test-plan.md" +plan_tracked: false +start_branch: $AC17_BRANCH +base_branch: $AC17_BRANCH +base_commit: $AC17_BASE_COMMIT +review_started: false +ask_codex_question: false +agent_teams: false +session_id: session_foreign +--- +EOF_AC17 +: > "$AC17_LOOP/bg-pending.marker" + +# Hook input has NO transcript_path -> has_pending_background_tasks is +# fail-closed; cleanup path must leave marker and session_id intact. +AC17_INPUT='{"session_id":"session_home"}' +run_stop_hook_with_input "$AC17_REPO" "$AC17_INPUT" + +if [[ -f "$AC17_LOOP/bg-pending.marker" ]]; then + pass "AC-17: unreadable transcript preserves bg-pending.marker" +else + fail "AC-17: unreadable transcript preserves bg-pending.marker" \ + "marker still present" "marker was removed" +fi + +if grep -q "^session_id: session_foreign$" "$AC17_STATE"; then + pass "AC-17b: unreadable transcript leaves stored session_id untouched" +else + fail "AC-17b: unreadable transcript leaves stored session_id untouched" \ + "session_id: session_foreign" "$(grep '^session_id:' "$AC17_STATE" || echo '(missing)')" +fi + +# AC-17c: transcript_path is provided but points at a non-existent file +# (equally unreadable). Same guarantee: marker + stored session_id +# preserved. +echo "Test AC-17c: transcript_path pointing at non-existent file preserves marker" +AC17C_REPO="$TEST_DIR/ac17c" +AC17C_LOOP=$(create_full_fixture "$AC17C_REPO") +AC17C_STATE="$AC17C_LOOP/state.md" +AC17C_BRANCH=$(git -C "$AC17C_REPO" rev-parse --abbrev-ref HEAD) +AC17C_BASE_COMMIT=$(git -C "$AC17C_REPO" rev-parse HEAD) +cat > "$AC17C_STATE" <<EOF_AC17C +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.5 +codex_effort: high +codex_timeout: 60 +push_every_round: false +full_review_round: 5 +plan_file: "plans/test-plan.md" +plan_tracked: false +start_branch: $AC17C_BRANCH +base_branch: $AC17C_BRANCH +base_commit: $AC17C_BASE_COMMIT +review_started: false +ask_codex_question: false +agent_teams: false +session_id: session_foreign +--- +EOF_AC17C +: > "$AC17C_LOOP/bg-pending.marker" + +AC17C_INPUT=$(jq -c -n --arg tp "$TRANSCRIPTS_DIR/never-written.jsonl" \ + '{transcript_path:$tp, session_id:"session_home"}') +run_stop_hook_with_input "$AC17C_REPO" "$AC17C_INPUT" + +if [[ -f "$AC17C_LOOP/bg-pending.marker" ]] \ + && grep -q "^session_id: session_foreign$" "$AC17C_STATE"; then + pass "AC-17c: missing-file transcript_path preserves marker and session_id" +else + fail "AC-17c: missing-file transcript_path preserves marker and session_id" \ + "marker present and session_id: session_foreign" \ + "marker=$(test -f "$AC17C_LOOP/bg-pending.marker" && echo present || echo missing); session_id=$(grep '^session_id:' "$AC17C_STATE" || echo '(missing)')" +fi + +# ---------------- AC-18 ---------------- +# Validator isolation: find_active_loop's marker-based adoption is opt-in +# via its third positional argument. Default callers (read/write/bash/etc. +# validators) must continue to see strict session-id isolation; a parked +# loop for a different session must NOT become visible to them through a +# bg-pending.marker. +echo "Test AC-18: find_active_loop default invocation ignores foreign marker" +AC18_BASE="$TEST_DIR/ac18-loops" +mkdir -p "$AC18_BASE/2026-03-02_00-00-00" +cat > "$AC18_BASE/2026-03-02_00-00-00/state.md" <<'EOF_AC18' +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.5 +codex_effort: high +session_id: session_foreign +--- +EOF_AC18 +: > "$AC18_BASE/2026-03-02_00-00-00/bg-pending.marker" + +AC18_DEFAULT=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + find_active_loop "$AC18_BASE" "session_home" +) +if [[ -z "$AC18_DEFAULT" ]]; then + pass "AC-18: find_active_loop default (no opt-in) ignores foreign marker dir" +else + fail "AC-18: find_active_loop default (no opt-in) ignores foreign marker dir" \ + "empty result (validators stay isolated)" "got: $AC18_DEFAULT" +fi + +AC18_OPTIN=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + find_active_loop "$AC18_BASE" "session_home" true +) +if [[ "$AC18_OPTIN" == "$AC18_BASE/2026-03-02_00-00-00" ]]; then + pass "AC-18b: find_active_loop with opt-in does return the marker dir" +else + fail "AC-18b: find_active_loop with opt-in does return the marker dir" \ + "$AC18_BASE/2026-03-02_00-00-00" "$AC18_OPTIN" +fi + +# ---------------- AC-19 ---------------- +# Empty-session caller + bg-pending.marker present: the caller might be +# the parked loop's owner invoking through a wrapper that didn't forward +# session_id, OR it might be a different session. The hook cannot tell +# them apart from the input, so the safe response is `exit 0` silently +# with no systemMessage and no on-disk mutation. The real Claude stop +# hook (which always has session_id populated) drives actual parking and +# cleanup. +echo "Test AC-19: ambiguous caller (empty session_id + marker) exits silently" +AC19_REPO="$TEST_DIR/ac19" +AC19_LOOP=$(create_full_fixture "$AC19_REPO") +AC19_STATE="$AC19_LOOP/state.md" +AC19_MARKER="$AC19_LOOP/bg-pending.marker" +AC19_BRANCH=$(git -C "$AC19_REPO" rev-parse --abbrev-ref HEAD) +AC19_BASE_COMMIT=$(git -C "$AC19_REPO" rev-parse HEAD) +cat > "$AC19_STATE" <<EOF_AC19 +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.5 +codex_effort: high +codex_timeout: 60 +push_every_round: false +full_review_round: 5 +plan_file: "plans/test-plan.md" +plan_tracked: false +start_branch: $AC19_BRANCH +base_branch: $AC19_BRANCH +base_commit: $AC19_BASE_COMMIT +review_started: false +ask_codex_question: false +agent_teams: false +session_id: session_alpha +--- +EOF_AC19 +AC19_STATE_HASH_BEFORE=$(sha256sum "$AC19_STATE" | awk '{print $1}') +: > "$AC19_MARKER" + +AC19_TRANSCRIPT="$TRANSCRIPTS_DIR/ac19.jsonl" +write_transcript "$AC19_TRANSCRIPT" '{"type":"user","message":{"role":"user","content":"hello"}}' + +# Hook input without any session_id key (mirrors rlcr-stop-gate.sh +# invoked without --session-id). +AC19_INPUT=$(jq -c -n --arg tp "$AC19_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC19_REPO" "$AC19_INPUT" +AC19_SYS_MSG=$(printf '%s' "$RUN_OUTPUT" | jq -r '.systemMessage // empty' 2>/dev/null || echo "") +AC19_STATE_HASH_AFTER=$(sha256sum "$AC19_STATE" | awk '{print $1}') +if [[ "$RUN_EXIT_CODE" -eq 0 ]] \ + && [[ ! -f "$RUN_MARKER" ]] \ + && [[ -f "$AC19_MARKER" ]] \ + && [[ "$AC19_STATE_HASH_BEFORE" == "$AC19_STATE_HASH_AFTER" ]] \ + && [[ -z "$AC19_SYS_MSG" ]]; then + pass "AC-19: ambiguous caller exits silently; marker and state.md preserved" +else + fail "AC-19: ambiguous caller exits silently; marker and state.md preserved" \ + "exit 0 + no systemMessage + marker stays + state.md byte-identical + no Codex" \ + "exit $RUN_EXIT_CODE, codex_marker=$(test -f "$RUN_MARKER" && echo present || echo missing), bg_marker=$(test -f "$AC19_MARKER" && echo present || echo missing), state_unchanged=$([[ "$AC19_STATE_HASH_BEFORE" == "$AC19_STATE_HASH_AFTER" ]] && echo yes || echo no), systemMessage='$AC19_SYS_MSG'; output: $RUN_OUTPUT" +fi + +# ---------------- AC-20 ---------------- +# Non-short-circuit cleanup must not drop bg-pending.marker when the +# transcript exists but cannot be parsed. The helper is fail-closed on +# malformed JSON; that failure must NOT be treated as "no pending". +echo "Test AC-20: malformed transcript preserves bg-pending.marker" +AC20_REPO="$TEST_DIR/ac20" +AC20_LOOP=$(create_full_fixture "$AC20_REPO") +AC20_STATE="$AC20_LOOP/state.md" +AC20_MARKER="$AC20_LOOP/bg-pending.marker" +AC20_BRANCH=$(git -C "$AC20_REPO" rev-parse --abbrev-ref HEAD) +AC20_BASE_COMMIT=$(git -C "$AC20_REPO" rev-parse HEAD) +cat > "$AC20_STATE" <<EOF_AC20 +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.5 +codex_effort: high +codex_timeout: 60 +push_every_round: false +full_review_round: 5 +plan_file: "plans/test-plan.md" +plan_tracked: false +start_branch: $AC20_BRANCH +base_branch: $AC20_BRANCH +base_commit: $AC20_BASE_COMMIT +review_started: false +ask_codex_question: false +agent_teams: false +session_id: session_home +--- +EOF_AC20 +: > "$AC20_MARKER" + +# Write a deliberately malformed transcript (truncated JSON object) so +# list_pending_background_task_ids's jq invocations fail the parse. +AC20_TRANSCRIPT="$TRANSCRIPTS_DIR/ac20.jsonl" +printf '%s\n' '{"type":"user","message":' > "$AC20_TRANSCRIPT" + +AC20_INPUT=$(jq -c -n --arg tp "$AC20_TRANSCRIPT" \ + '{transcript_path:$tp, session_id:"session_home"}') +run_stop_hook_with_input "$AC20_REPO" "$AC20_INPUT" + +if [[ -f "$AC20_MARKER" ]]; then + pass "AC-20: malformed transcript preserves bg-pending.marker" +else + fail "AC-20: malformed transcript preserves bg-pending.marker" \ + "marker still present (cleanup must not fire on fail-closed helper)" \ + "marker was removed" +fi + +# ---------------- AC-21 ---------------- +# Transcript scan boundary: the Claude transcript is session-wide and +# can contain background launches that predate the RLCR loop. The +# helper filters launch events by `.timestamp >= since_ts` (derived +# from the loop dir basename) so only launches made after the loop +# started count as pending. +echo "Test AC-21: pre-loop launches are filtered out by since_ts" +AC21_TRANSCRIPT="$TRANSCRIPTS_DIR/ac21.jsonl" + +# The loop boundary used throughout the suite's fixtures is +# 2026-03-01 00:00:00. Build two launches: one BEFORE that boundary +# (should be filtered) and one AFTER (should still count as pending). +AC21_PRE_LAUNCH=$(jq -c -n '{ + type:"user", + timestamp:"2026-02-28T10:00:00.000Z", + toolUseResult:{isAsync:true, agentId:"agent_pre_loop"} +}') +AC21_POST_LAUNCH=$(jq -c -n '{ + type:"user", + timestamp:"2026-03-01T10:00:00.000Z", + toolUseResult:{isAsync:true, agentId:"agent_in_loop"} +}') +write_transcript "$AC21_TRANSCRIPT" "$AC21_PRE_LAUNCH" "$AC21_POST_LAUNCH" + +AC21_SINCE="2026-03-01T00:00:00.000Z" +AC21_FILTERED=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + list_pending_background_task_ids "$AC21_TRANSCRIPT" "$AC21_SINCE" 2>/dev/null | sort -u +) +if [[ "$AC21_FILTERED" == "agent_in_loop" ]]; then + pass "AC-21: list_pending_background_task_ids filters launches before since_ts" +else + fail "AC-21: list_pending_background_task_ids filters launches before since_ts" \ + "only 'agent_in_loop' (pre-loop launch excluded)" "got: $AC21_FILTERED" +fi + +# AC-21b: confirm the derive helper produces the expected ISO-8601 form +# under TZ=UTC, where local wall clock == UTC so no offset is applied. +AC21B_DERIVED=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + export TZ="UTC" + derive_loop_start_iso_ts "/tmp/.humanize/rlcr/2026-03-01_00-00-00" +) +if [[ "$AC21B_DERIVED" == "2026-03-01T00:00:00.000Z" ]]; then + pass "AC-21b: derive_loop_start_iso_ts under TZ=UTC preserves the wall-clock" +else + fail "AC-21b: derive_loop_start_iso_ts under TZ=UTC preserves the wall-clock" \ + "2026-03-01T00:00:00.000Z" "$AC21B_DERIVED" +fi + +# AC-21d: setup-rlcr-loop.sh names the dir with local wall clock, so a +# non-UTC caller must see the boundary shifted into actual UTC. +# JST (UTC+9) example: 09:00 JST == 00:00 UTC. +AC21D_DERIVED=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + export TZ="Asia/Tokyo" + derive_loop_start_iso_ts "/tmp/.humanize/rlcr/2026-03-01_09-00-00" +) +if [[ "$AC21D_DERIVED" == "2026-03-01T00:00:00.000Z" ]]; then + pass "AC-21d: derive_loop_start_iso_ts converts JST wall-clock to correct UTC" +else + fail "AC-21d: derive_loop_start_iso_ts converts JST wall-clock to correct UTC" \ + "2026-03-01T00:00:00.000Z (9am JST = 0am UTC)" "$AC21D_DERIVED" +fi + +# AC-21e: PST (UTC-8) example. Pick March 1 which is still PST (DST +# does not start until March 8, 2026), so the offset is a fixed -8h: +# 00:00 PST == 08:00 UTC. +AC21E_DERIVED=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + export TZ="America/Los_Angeles" + derive_loop_start_iso_ts "/tmp/.humanize/rlcr/2026-03-01_00-00-00" +) +if [[ "$AC21E_DERIVED" == "2026-03-01T08:00:00.000Z" ]]; then + pass "AC-21e: derive_loop_start_iso_ts converts PST wall-clock to correct UTC" +else + fail "AC-21e: derive_loop_start_iso_ts converts PST wall-clock to correct UTC" \ + "2026-03-01T08:00:00.000Z (0am PST = 8am UTC before DST)" "$AC21E_DERIVED" +fi + +# AC-21c: end-to-end through the stop hook. Pre-loop launch only -> hook +# must NOT short-circuit (no pending bg "belongs" to this loop). +echo "Test AC-21c: stop hook ignores pre-loop launches for this loop" +AC21C_REPO="$TEST_DIR/ac21c" +AC21C_LOOP=$(create_full_fixture "$AC21C_REPO") +AC21C_MARKER="$AC21C_LOOP/bg-pending.marker" +AC21C_TRANSCRIPT="$TRANSCRIPTS_DIR/ac21c.jsonl" +write_transcript "$AC21C_TRANSCRIPT" "$AC21_PRE_LAUNCH" +AC21C_INPUT=$(jq -c -n --arg tp "$AC21C_TRANSCRIPT" \ + '{transcript_path:$tp, session_id:"session_home"}') +run_stop_hook_with_input "$AC21C_REPO" "$AC21C_INPUT" + +# With the pre-loop launch filtered out, the transcript has no in-loop +# pending bg -> no short-circuit -> no marker written -> hook proceeds +# to the normal flow (which will call Codex in this fixture). +if [[ ! -f "$AC21C_MARKER" ]] && [[ -f "$RUN_MARKER" ]]; then + pass "AC-21c: pre-loop launch does not write bg-pending.marker; Codex runs" +else + fail "AC-21c: pre-loop launch does not write bg-pending.marker; Codex runs" \ + "no bg marker AND Codex invoked" \ + "bg_marker=$(test -f "$AC21C_MARKER" && echo present || echo missing); codex_marker=$(test -f "$RUN_MARKER" && echo present || echo missing)" +fi + +# ---------------- AC-22 ---------------- +# Wrapper without --session-id on a repo that has NO marker: should +# behave just like the normal same-session path, i.e. a pending bg in +# the transcript writes the marker and the wrapper output surfaces the +# "background task" systemMessage. This confirms the ambiguous-caller +# guard only fires on a pre-existing marker, not on every no-session +# call. +echo "Test AC-22: wrapper without session_id, no prior marker, pending bg -> ALLOW with systemMessage" +AC22_REPO="$TEST_DIR/ac22" +create_full_fixture "$AC22_REPO" > /dev/null +AC22_LOOP="$AC22_REPO/.humanize/rlcr/2026-03-01_00-00-00" +AC22_MARKER="$AC22_LOOP/bg-pending.marker" +AC22_TRANSCRIPT="$TRANSCRIPTS_DIR/ac22.jsonl" +AC22_LAUNCH=$(jq -c -n '{ + type:"user", + timestamp:"2026-03-01T10:00:00.000Z", + toolUseResult:{isAsync:true, agentId:"agent_wrapper_pending"} +}') +write_transcript "$AC22_TRANSCRIPT" "$AC22_LAUNCH" + +AC22_OUT="$TEST_DIR/ac22-out.txt" +set +e +( + cd "$AC22_REPO" + "$GATE_SCRIPT" --project-root "$AC22_REPO" --transcript-path "$AC22_TRANSCRIPT" +) > "$AC22_OUT" 2>&1 +AC22_EXIT=$? +set -e + +if [[ "$AC22_EXIT" -eq 0 ]] \ + && grep -q "^ALLOW:" "$AC22_OUT" \ + && grep -q "background task" "$AC22_OUT" \ + && [[ -f "$AC22_MARKER" ]]; then + pass "AC-22: wrapper without session_id + no prior marker + pending bg -> writes marker, surfaces systemMessage" +else + AC22_BODY=$(cat "$AC22_OUT" 2>/dev/null || true) + fail "AC-22: wrapper without session_id + no prior marker + pending bg -> writes marker, surfaces systemMessage" \ + "exit 0 + ALLOW + 'background task' + marker written" \ + "exit $AC22_EXIT; marker=$(test -f "$AC22_MARKER" && echo present || echo missing); output: $AC22_BODY" +fi + +# AC-22b: wrapper without --session-id on a repo that ALREADY has a +# marker (e.g. set up by a prior hook call). Must exit 0 silently -- no +# systemMessage, no state mutation. Mirrors the real scenario Codex +# flagged: rlcr-stop-gate.sh re-run by an unaware caller. +echo "Test AC-22b: wrapper without session_id, prior marker -> silent ALLOW" +AC22B_REPO="$TEST_DIR/ac22b" +AC22B_LOOP=$(create_full_fixture "$AC22B_REPO") +AC22B_STATE="$AC22B_LOOP/state.md" +AC22B_MARKER="$AC22B_LOOP/bg-pending.marker" +AC22B_BRANCH=$(git -C "$AC22B_REPO" rev-parse --abbrev-ref HEAD) +AC22B_BASE_COMMIT=$(git -C "$AC22B_REPO" rev-parse HEAD) +cat > "$AC22B_STATE" <<EOF_AC22B +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.5 +codex_effort: high +codex_timeout: 60 +push_every_round: false +full_review_round: 5 +plan_file: "plans/test-plan.md" +plan_tracked: false +start_branch: $AC22B_BRANCH +base_branch: $AC22B_BRANCH +base_commit: $AC22B_BASE_COMMIT +review_started: false +ask_codex_question: false +agent_teams: false +session_id: session_alpha +--- +EOF_AC22B +AC22B_STATE_HASH_BEFORE=$(sha256sum "$AC22B_STATE" | awk '{print $1}') +: > "$AC22B_MARKER" + +AC22B_OUT="$TEST_DIR/ac22b-out.txt" +set +e +( + cd "$AC22B_REPO" + "$GATE_SCRIPT" --project-root "$AC22B_REPO" +) > "$AC22B_OUT" 2>&1 +AC22B_EXIT=$? +set -e + +AC22B_STATE_HASH_AFTER=$(sha256sum "$AC22B_STATE" | awk '{print $1}') +if [[ "$AC22B_EXIT" -eq 0 ]] \ + && grep -q "^ALLOW:" "$AC22B_OUT" \ + && ! grep -qi "parked" "$AC22B_OUT" \ + && [[ -f "$AC22B_MARKER" ]] \ + && [[ "$AC22B_STATE_HASH_BEFORE" == "$AC22B_STATE_HASH_AFTER" ]]; then + pass "AC-22b: wrapper without session_id + existing marker -> silent ALLOW; marker and state preserved" +else + AC22B_BODY=$(cat "$AC22B_OUT" 2>/dev/null || true) + fail "AC-22b: wrapper without session_id + existing marker -> silent ALLOW; marker and state preserved" \ + "exit 0 + ALLOW: (no 'parked') + marker kept + state.md byte-identical" \ + "exit $AC22B_EXIT; marker=$(test -f "$AC22B_MARKER" && echo present || echo missing); state_unchanged=$([[ "$AC22B_STATE_HASH_BEFORE" == "$AC22B_STATE_HASH_AFTER" ]] && echo yes || echo no); output: $AC22B_BODY" +fi + +# ---------------- AC-23 ---------------- +# Liveness probe positive: a pending task whose output file is open by at +# least one process (lsof exits 0) must still be treated as running. +# The short-circuit must fire and emit a systemMessage. +echo "Test AC-23: liveness probe - alive task (lsof has holder) -> still short-circuits" +AC23_REPO="$TEST_DIR/ac23" +AC23_LOOP=$(create_full_fixture "$AC23_REPO") +AC23_STATE="$AC23_LOOP/state.md" +AC23_TRANSCRIPT="$TRANSCRIPTS_DIR/ac23.jsonl" +AC23_TASK_ID="agent_probe_alive" +AC23_LAUNCH=$(emit_tool_use_assistant "toolu_AC23" "Agent" ',"description":"x","prompt":"x"') +AC23_RESULT=$(emit_async_agent_launch_result "toolu_AC23" "$AC23_TASK_ID") +write_transcript "$AC23_TRANSCRIPT" "$AC23_LAUNCH" "$AC23_RESULT" + +AC23_UID=$(id -u) +AC23_SLUG=$(basename "$TRANSCRIPTS_DIR") +AC23_TASKS_DIR="/tmp/claude-${AC23_UID}/${AC23_SLUG}/ac23/tasks" +mkdir -p "$AC23_TASKS_DIR" +touch "$AC23_TASKS_DIR/${AC23_TASK_ID}.output" + +AC23_INPUT=$(jq -c -n --arg tp "$AC23_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC23_REPO" "$AC23_INPUT" "" "$TEST_DIR/bin/lsof-alive" +rm -rf "/tmp/claude-${AC23_UID}/${AC23_SLUG}/ac23" 2>/dev/null || true +assert_systemmessage_only \ + "AC-23: alive task (lsof has holder) still triggers short-circuit" \ + "$AC23_REPO" "$AC23_STATE" "1 background task" + +# ---------------- AC-24 ---------------- +# Liveness probe negative: a pending task whose output file has no open +# file descriptors (lsof exits 1) was killed without a completion event. +# The probe must drop it so the hook proceeds to normal Codex review. +echo "Test AC-24: liveness probe - dead/orphaned task (lsof no holder) -> reaches Codex" +AC24_REPO="$TEST_DIR/ac24" +create_full_fixture "$AC24_REPO" > /dev/null +AC24_TRANSCRIPT="$TRANSCRIPTS_DIR/ac24.jsonl" +AC24_TASK_ID="agent_probe_dead" +AC24_LAUNCH=$(emit_tool_use_assistant "toolu_AC24" "Agent" ',"description":"x","prompt":"x"') +AC24_RESULT=$(emit_async_agent_launch_result "toolu_AC24" "$AC24_TASK_ID") +write_transcript "$AC24_TRANSCRIPT" "$AC24_LAUNCH" "$AC24_RESULT" + +AC24_UID=$(id -u) +AC24_SLUG=$(basename "$TRANSCRIPTS_DIR") +AC24_TASKS_DIR="/tmp/claude-${AC24_UID}/${AC24_SLUG}/ac24/tasks" +mkdir -p "$AC24_TASKS_DIR" +touch "$AC24_TASKS_DIR/${AC24_TASK_ID}.output" + +AC24_INPUT=$(jq -c -n --arg tp "$AC24_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC24_REPO" "$AC24_INPUT" "" "$TEST_DIR/bin/lsof-dead" +rm -rf "/tmp/claude-${AC24_UID}/${AC24_SLUG}/ac24" 2>/dev/null || true +assert_reached_codex "AC-24: dead/orphaned task (lsof no holder) is pruned; Codex review runs" + +print_test_summary "Stop Hook Background-Task Allow Test Summary" +exit $? diff --git a/tests/test-stop-hook-legacy-compat.sh b/tests/test-stop-hook-legacy-compat.sh index 94ad7db3..638de91e 100755 --- a/tests/test-stop-hook-legacy-compat.sh +++ b/tests/test-stop-hook-legacy-compat.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for legacy compatibility fixes in loop-codex-stop-hook.sh # @@ -25,7 +25,7 @@ mkdir -p "$XDG_CACHE_HOME" setup_mock_codex() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << 'EOF' -#!/bin/bash +#!/usr/bin/env bash if [[ -n "${MOCK_CODEX_MARKER:-}" ]]; then : > "$MOCK_CODEX_MARKER" fi @@ -68,7 +68,7 @@ EOF --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 60 push_every_round: false @@ -163,6 +163,11 @@ echo "Test 1b: Untracked .humanizeconfig still blocks dirty checks" TEST1B_REPO="$TEST_DIR/test1b" create_stop_hook_fixture "$TEST1B_REPO" touch "$TEST1B_REPO/.humanizeconfig" +# Also create a .humanize-old directory to trigger the "Special Case" note. +# The .humanize/ directory itself may be covered by a global gitignore +# so it might not appear as untracked; .humanize-old/ is never globally ignored. +mkdir -p "$TEST1B_REPO/.humanize-old" +echo "legacy" > "$TEST1B_REPO/.humanize-old/legacy.txt" run_stop_hook "$TEST1B_REPO" if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ ! -f "$RUN_MARKER" ]] && \ diff --git a/tests/test-task-tag-routing.sh b/tests/test-task-tag-routing.sh index ae9365f7..f5a8c0e9 100755 --- a/tests/test-task-tag-routing.sh +++ b/tests/test-task-tag-routing.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for task-tag routing in RLCR loop prompts # @@ -27,15 +27,22 @@ create_mock_codex() { local exec_output="${2:-Need follow-up work}" mkdir -p "$bin_dir" cat > "$bin_dir/codex" << MOCK_EOF -#!/bin/bash -if [[ "\$1" == "exec" ]]; then +#!/usr/bin/env bash +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'OUT' $exec_output OUT -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then echo "No issues found." else - echo "mock-codex: unsupported command \$1" >&2 + echo "mock-codex: unsupported command \$*" >&2 exit 1 fi MOCK_EOF @@ -180,6 +187,15 @@ Keep routing behavior stable. | Task | Target AC | Status | Tag | Owner | Notes | |------|-----------|--------|-----|-------|-------| | Keep routing note | AC-1 | in_progress | analyze | codex | - +EOF + cat > "$loop_dir/round-0-contract.md" << 'EOF' +# Round 0 Contract + +- Mainline Objective: Keep routing behavior stable while addressing the current review feedback. +- Target ACs: AC-1 +- Blocking Side Issues In Scope: none +- Queued Side Issues Out of Scope: none +- Success Criteria: Follow-up prompt is generated with routing guidance intact. EOF cat > "$loop_dir/round-0-summary.md" << 'EOF' # Round 0 Summary @@ -197,6 +213,8 @@ setup_test_dir setup_stophook_repo "$TEST_DIR/hook-routing" create_mock_codex "$TEST_DIR/hook-routing/bin" "## Review Feedback +Mainline Progress Verdict: STALLED + Issue remains unresolved. CONTINUE" diff --git a/tests/test-template-loader.sh b/tests/test-template-loader.sh index 74bbd784..e9d48639 100755 --- a/tests/test-template-loader.sh +++ b/tests/test-template-loader.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for template-loader.sh # diff --git a/tests/test-template-references.sh b/tests/test-template-references.sh index a302ad19..13d4c31b 100755 --- a/tests/test-template-references.sh +++ b/tests/test-template-references.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Template Reference Validation # diff --git a/tests/test-templates-comprehensive.sh b/tests/test-templates-comprehensive.sh index de68ded7..d27dac44 100755 --- a/tests/test-templates-comprehensive.sh +++ b/tests/test-templates-comprehensive.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Comprehensive template validation tests for CI/CD # diff --git a/tests/test-todo-checker.sh b/tests/test-todo-checker.sh index b3e7b072..13dc9308 100755 --- a/tests/test-todo-checker.sh +++ b/tests/test-todo-checker.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for check-todos-from-transcript.py # @@ -157,6 +157,36 @@ else fail "In-progress status" "exit 1" "exit $EXIT_CODE" fi +# Test 8b: Queued TodoWrite item does NOT block exit +echo "Test 8b: Queued TodoWrite item" +cat > "$TEST_DIR/transcript-queued.jsonl" << 'EOF' +{"type": "assistant", "message": {"content": [{"type": "tool_use", "name": "TodoWrite", "input": {"todos": [{"content": "[queued] Cleanup follow-up", "status": "pending"}]}}]}} +EOF +set +e +RESULT=$(echo "{\"transcript_path\": \"$TEST_DIR/transcript-queued.jsonl\"}" | python3 "$TODO_CHECKER" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Queued TodoWrite item exits 0" +else + fail "Queued TodoWrite item" "exit 0" "exit $EXIT_CODE, output: $RESULT" +fi + +# Test 8c: Lane tags in the middle of TodoWrite content do NOT downgrade blocking tasks +echo "Test 8c: Inline queued tag does not bypass TodoWrite blocker" +cat > "$TEST_DIR/transcript-inline-tag.jsonl" << 'EOF' +{"type": "assistant", "message": {"content": [{"type": "tool_use", "name": "TodoWrite", "input": {"todos": [{"content": "Fix docs mentioning [queued] follow-ups", "status": "pending"}]}}]}} +EOF +set +e +RESULT=$(echo "{\"transcript_path\": \"$TEST_DIR/transcript-inline-tag.jsonl\"}" | python3 "$TODO_CHECKER" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 1 ]] && echo "$RESULT" | grep -q '\[blocking\]'; then + pass "Inline queued tag still blocks TodoWrite item" +else + fail "Inline queued TodoWrite item" "exit 1 with [blocking] output" "exit $EXIT_CODE, output: $RESULT" +fi + # ======================================== # Test Group 3: Transcript Format Variations # ======================================== @@ -357,6 +387,57 @@ else fail "Task with in_progress status" "exit 1" "exit $EXIT_CODE, output: $RESULT" fi +# Test 19b: Queued file-based task does NOT block exit +echo "Test 19b: Queued task does not block" +MOCK_SESSION_19B="session-19b" +mkdir -p "$MOCK_TASKS_BASE/$MOCK_SESSION_19B" +cat > "$MOCK_TASKS_BASE/$MOCK_SESSION_19B/task-1.json" << 'EOF' +{"subject": "[queued] Follow-up cleanup", "status": "pending"} +EOF +set +e +RESULT=$(echo "{\"session_id\": \"$MOCK_SESSION_19B\", \"tasks_base_dir\": \"$MOCK_TASKS_BASE\"}" | python3 "$TODO_CHECKER" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Queued task exits 0" +else + fail "Queued task" "exit 0" "exit $EXIT_CODE, output: $RESULT" +fi + +# Test 19c: Explicit blocking tag still blocks +echo "Test 19c: Blocking task still blocks" +MOCK_SESSION_19C="session-19c" +mkdir -p "$MOCK_TASKS_BASE/$MOCK_SESSION_19C" +cat > "$MOCK_TASKS_BASE/$MOCK_SESSION_19C/task-1.json" << 'EOF' +{"subject": "[blocking] Fix failing test", "status": "pending"} +EOF +set +e +RESULT=$(echo "{\"session_id\": \"$MOCK_SESSION_19C\", \"tasks_base_dir\": \"$MOCK_TASKS_BASE\"}" | python3 "$TODO_CHECKER" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 1 ]] && echo "$RESULT" | grep -q '\[blocking\]'; then + pass "Blocking task exits 1 with lane marker" +else + fail "Blocking task" "exit 1 with [blocking] output" "exit $EXIT_CODE, output: $RESULT" +fi + +# Test 19d: Inline queued tag in task body does NOT downgrade blocking tasks +echo "Test 19d: Inline queued tag in task body does not bypass blocker" +MOCK_SESSION_19D="session-19d" +mkdir -p "$MOCK_TASKS_BASE/$MOCK_SESSION_19D" +cat > "$MOCK_TASKS_BASE/$MOCK_SESSION_19D/task-1.json" << 'EOF' +{"subject": "Triage review fallout", "description": "Notes mention [queued] cleanup but this task is still active", "status": "pending"} +EOF +set +e +RESULT=$(echo "{\"session_id\": \"$MOCK_SESSION_19D\", \"tasks_base_dir\": \"$MOCK_TASKS_BASE\"}" | python3 "$TODO_CHECKER" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 1 ]] && echo "$RESULT" | grep -q '\[blocking\]'; then + pass "Inline queued tag still blocks file-based task" +else + fail "Inline queued file-based task" "exit 1 with [blocking] output" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 20: Multiple tasks, one incomplete echo "Test 20: Multiple tasks, one incomplete" MOCK_SESSION_20="session-20" diff --git a/tests/test-unified-codex-config.sh b/tests/test-unified-codex-config.sh index a9817709..51e1e9b6 100755 --- a/tests/test-unified-codex-config.sh +++ b/tests/test-unified-codex-config.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for unified codex_model/codex_effort configuration # @@ -65,8 +65,8 @@ DEFAULT_CONFIG="$PROJECT_ROOT/config/default_config.json" if ! command -v jq >/dev/null 2>&1; then skip "default config tests require jq" "jq not found" else - assert_eq "default_config.json: codex_model is gpt-5.4" \ - "gpt-5.4" "$(jq -r '.codex_model' "$DEFAULT_CONFIG")" + assert_eq "default_config.json: codex_model is gpt-5.5" \ + "gpt-5.5" "$(jq -r '.codex_model' "$DEFAULT_CONFIG")" assert_eq "default_config.json: codex_effort is high" \ "high" "$(jq -r '.codex_effort' "$DEFAULT_CONFIG")" @@ -100,8 +100,8 @@ else merged=$(XDG_CONFIG_HOME="$TEST_DIR/no-user-config" load_merged_config "$PROJECT_ROOT" "$PROJECT_DIR" 2>/dev/null) - assert_eq "default-only: codex_model defaults to gpt-5.4" \ - "gpt-5.4" "$(get_config_value "$merged" "codex_model")" + assert_eq "default-only: codex_model defaults to gpt-5.5" \ + "gpt-5.5" "$(get_config_value "$merged" "codex_model")" assert_eq "default-only: codex_effort defaults to high" \ "high" "$(get_config_value "$merged" "codex_effort")" @@ -141,7 +141,7 @@ else " 2>/dev/null || echo "ERROR") assert_eq "loop-common.sh: DEFAULT_CODEX_MODEL is set" \ - "gpt-5.4" "$(echo "$result" | cut -d'|' -f1)" + "gpt-5.5" "$(echo "$result" | cut -d'|' -f1)" assert_eq "loop-common.sh: DEFAULT_CODEX_EFFORT is set" \ "high" "$(echo "$result" | cut -d'|' -f2)" @@ -208,8 +208,8 @@ else result_line="$(printf '%s\n' "$result" | grep '^RESULT:' | tail -n 1)" - assert_eq "invalid config: codex_model falls back to gpt-5.4" \ - "gpt-5.4" "$(echo "$result_line" | cut -d':' -f2 | cut -d'|' -f1)" + assert_eq "invalid config: codex_model falls back to gpt-5.5" \ + "gpt-5.5" "$(echo "$result_line" | cut -d':' -f2 | cut -d'|' -f1)" assert_eq "invalid config: codex_effort falls back to high" \ "high" "$(echo "$result_line" | cut -d'|' -f2)" @@ -236,8 +236,8 @@ else result_line="$(printf '%s\n' "$result" | grep '^RESULT:' | tail -n 1)" - assert_eq "non-Codex config ($invalid_model): codex_model falls back to gpt-5.4" \ - "gpt-5.4" "$(echo "$result_line" | cut -d':' -f2 | cut -d'|' -f1)" + assert_eq "non-Codex config ($invalid_model): codex_model falls back to gpt-5.5" \ + "gpt-5.5" "$(echo "$result_line" | cut -d':' -f2 | cut -d'|' -f1)" assert_eq "non-Codex config ($invalid_model): codex_effort stays at high fallback" \ "high" "$(echo "$result_line" | cut -d'|' -f2)" @@ -324,8 +324,8 @@ BARE_EOF echo \"\$EXEC_MODEL|\$EXEC_EFFORT\" " 2>/dev/null || echo "ERROR") - assert_eq "bare state: falls back to DEFAULT_CODEX_MODEL (gpt-5.4)" \ - "gpt-5.4" "$(echo "$result" | cut -d'|' -f1)" + assert_eq "bare state: falls back to DEFAULT_CODEX_MODEL (gpt-5.5)" \ + "gpt-5.5" "$(echo "$result" | cut -d'|' -f1)" assert_eq "bare state: falls back to DEFAULT_CODEX_EFFORT (high)" \ "high" "$(echo "$result" | cut -d'|' -f2)" @@ -422,7 +422,7 @@ else --- current_round: 1 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 5400 push_every_round: false @@ -460,8 +460,8 @@ STALE_EOF echo \"\$STATE_CODEX_MODEL|\$STATE_CODEX_EFFORT\" " 2>/dev/null || echo "ERROR") - assert_eq "stale state: STATE_CODEX_MODEL still parsed (gpt-5.4)" \ - "gpt-5.4" "$(echo "$result" | cut -d'|' -f1)" + assert_eq "stale state: STATE_CODEX_MODEL still parsed (gpt-5.5)" \ + "gpt-5.5" "$(echo "$result" | cut -d'|' -f1)" assert_eq "stale state: STATE_CODEX_EFFORT still parsed (high)" \ "high" "$(echo "$result" | cut -d'|' -f2)" @@ -491,7 +491,7 @@ else --- current_round: 1 max_iterations: 10 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: superhigh codex_timeout: 3600 push_every_round: false @@ -512,7 +512,7 @@ HOOK_STATE_EOF STUB_BIN="$TEST_DIR/stub-bin" mkdir -p "$STUB_BIN" cat > "$STUB_BIN/codex" << 'STUB_EOF' -#!/bin/bash +#!/usr/bin/env bash echo "CODEX_INVOKED" >> "$CODEX_INVOCATION_LOG" exit 0 STUB_EOF @@ -640,14 +640,14 @@ echo "" echo "--- Input validation ---" # Test invalid model name (has spaces) - test the validation regex directly -model_with_spaces="gpt 5.4 bad" +model_with_spaces="gpt 5.5 bad" if [[ ! "$model_with_spaces" =~ ^[a-zA-Z0-9._-]+$ ]]; then pass "validation: model with spaces is rejected by regex" else fail "validation: model with spaces is rejected by regex" fi -model_with_shell="gpt-5.4;rm-rf" +model_with_shell="gpt-5.5;rm-rf" if [[ ! "$model_with_shell" =~ ^[a-zA-Z0-9._-]+$ ]]; then pass "validation: model with shell metacharacters is rejected" else @@ -673,76 +673,6 @@ done echo "" -# ======================================== -# PR loop respects config-backed codex_model (AC-5) -# ======================================== - -echo "--- PR loop config-backed defaults ---" - -SETUP_PR_LOOP="$PROJECT_ROOT/scripts/setup-pr-loop.sh" -PR_STOP_HOOK="$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" - -if [[ ! -f "$LOOP_COMMON" ]]; then - skip "PR loop config tests require loop-common.sh" "file not found" -elif [[ ! -f "$SETUP_PR_LOOP" ]]; then - skip "PR loop config tests require setup-pr-loop.sh" "file not found" -else - # PR loop setup does NOT pre-set DEFAULT_CODEX_MODEL (should come from config) - assert_no_grep "setup-pr-loop.sh: does not pre-set DEFAULT_CODEX_MODEL" \ - 'DEFAULT_CODEX_MODEL=' "$SETUP_PR_LOOP" - - # PR loop setup DOES pre-set DEFAULT_CODEX_EFFORT to medium - assert_grep "setup-pr-loop.sh: pre-sets DEFAULT_CODEX_EFFORT to medium" \ - 'DEFAULT_CODEX_EFFORT="medium"' "$SETUP_PR_LOOP" - - # PR stop hook also does NOT pre-set DEFAULT_CODEX_MODEL - if [[ ! -f "$PR_STOP_HOOK" ]]; then - skip "pr-loop-stop-hook.sh tests require pr-loop-stop-hook.sh" "file not found" - else - assert_no_grep "pr-loop-stop-hook.sh: does not pre-set DEFAULT_CODEX_MODEL" \ - 'DEFAULT_CODEX_MODEL=' "$PR_STOP_HOOK" - - assert_grep "pr-loop-stop-hook.sh: pre-sets DEFAULT_CODEX_EFFORT to medium" \ - 'DEFAULT_CODEX_EFFORT="medium"' "$PR_STOP_HOOK" - fi - - # Behavioral: sourcing loop-common.sh with PR loop effort pre-set picks up config model - setup_test_dir - PR_CFG_PROJECT="$TEST_DIR/pr-cfg-project" - mkdir -p "$PR_CFG_PROJECT/.humanize" - printf '{"codex_model": "o3-mini", "codex_effort": "low"}' > "$PR_CFG_PROJECT/.humanize/config.json" - - result=$(bash -c " - export DEFAULT_CODEX_EFFORT='medium' - export CLAUDE_PROJECT_DIR='$PR_CFG_PROJECT' - export XDG_CONFIG_HOME='$TEST_DIR/no-user-config' - source '$LOOP_COMMON' 2>/dev/null - echo \"\$DEFAULT_CODEX_MODEL|\$DEFAULT_CODEX_EFFORT\" - " 2>/dev/null || echo "ERROR") - - assert_eq "PR loop behavioral: config codex_model respected (o3-mini)" \ - "o3-mini" "$(echo "$result" | cut -d'|' -f1)" - - assert_eq "PR loop behavioral: pre-set effort kept over config (medium)" \ - "medium" "$(echo "$result" | cut -d'|' -f2)" - - # Without config, falls back to hardcoded default model but keeps medium effort - result=$(bash -c " - export DEFAULT_CODEX_EFFORT='medium' - export XDG_CONFIG_HOME='$TEST_DIR/no-user-config' - source '$LOOP_COMMON' 2>/dev/null - echo \"\$DEFAULT_CODEX_MODEL|\$DEFAULT_CODEX_EFFORT\" - " 2>/dev/null || echo "ERROR") - - assert_eq "PR loop behavioral: no config falls back to gpt-5.4" \ - "gpt-5.4" "$(echo "$result" | cut -d'|' -f1)" - - assert_eq "PR loop behavioral: no config keeps medium effort" \ - "medium" "$(echo "$result" | cut -d'|' -f2)" -fi - -echo "" - # ======================================== # ask-codex respects config-backed defaults (AC-5) # ======================================== @@ -775,127 +705,6 @@ fi echo "" -# ======================================== -# PR loop --codex-model override (runtime behavioral) -# ======================================== - -echo "--- PR loop --codex-model override (runtime) ---" - -if [[ ! -f "$SETUP_PR_LOOP" ]]; then - skip "PR loop override test requires setup-pr-loop.sh" "file not found" -else - # Run setup-pr-loop.sh --help with project config to verify help text shows config-backed default - # --help exits before requiring gh/PR prerequisites, so no external deps needed - setup_test_dir - PR_OVERRIDE_PROJECT="$TEST_DIR/pr-override-project" - mkdir -p "$PR_OVERRIDE_PROJECT/.humanize" - printf '{"codex_model": "o3-mini", "codex_effort": "low"}' > "$PR_OVERRIDE_PROJECT/.humanize/config.json" - - help_output=$(cd "$PR_OVERRIDE_PROJECT" && \ - CLAUDE_PROJECT_DIR="$PR_OVERRIDE_PROJECT" \ - XDG_CONFIG_HOME="$TEST_DIR/no-user-config" \ - timeout 10 bash "$SETUP_PR_LOOP" --help 2>&1) || true - - # Help text must mention config-backed default (not a hardcoded model name) - if echo "$help_output" | grep -q 'default from config'; then - pass "PR loop runtime: --help shows config-backed default" - else - fail "PR loop runtime: --help shows config-backed default" "contains 'default from config'" "$(echo "$help_output" | grep codex-model)" - fi - - # End-to-end: run setup-pr-loop.sh with mock gh/codex and --codex-model override - if ! command -v jq >/dev/null 2>&1; then - skip "PR loop e2e test requires jq" "jq not found" - else - setup_test_dir - PR_E2E_PROJECT="$TEST_DIR/pr-e2e-project" - init_test_git_repo "$PR_E2E_PROJECT" - mkdir -p "$PR_E2E_PROJECT/.humanize" - printf '{"codex_model": "o3-mini", "codex_effort": "low"}' > "$PR_E2E_PROJECT/.humanize/config.json" - - # Create a local bare remote (setup-pr-loop.sh needs a git remote) - PR_BARE_REMOTE="$TEST_DIR/pr-remote.git" - git clone --bare "$PR_E2E_PROJECT" "$PR_BARE_REMOTE" -q 2>/dev/null - (cd "$PR_E2E_PROJECT" && git remote remove origin 2>/dev/null; git remote add origin "$PR_BARE_REMOTE") 2>/dev/null || true - - # Create mock gh that handles all setup-pr-loop.sh calls - PR_MOCK_BIN="$TEST_DIR/pr-mock-bin" - mkdir -p "$PR_MOCK_BIN" - cat > "$PR_MOCK_BIN/gh" << 'GH_MOCK_EOF' -#!/bin/bash -# Mock gh for setup-pr-loop.sh end-to-end test -ALL_ARGS="$*" -case "$1" in - auth) exit 0 ;; - repo) - if [[ "$ALL_ARGS" == *"owner,name"* ]]; then - echo "testowner/testrepo"; exit 0 - elif [[ "$ALL_ARGS" == *"parent"* ]]; then - echo "null/"; exit 0 - fi ;; - pr) - if [[ "$2" == "view" ]]; then - if [[ "$ALL_ARGS" == *"number,url"* ]]; then - printf '123\nhttps://github.com/testowner/testrepo/pull/123'; exit 0 - elif [[ "$ALL_ARGS" == *"state"* ]]; then - echo "OPEN"; exit 0 - elif [[ "$ALL_ARGS" == *"number"* ]]; then - echo "123"; exit 0 - elif [[ "$ALL_ARGS" == *"headRefOid"* ]]; then - echo '{"sha":"abc123","date":"2026-01-01T00:00:00Z"}'; exit 0 - fi - elif [[ "$2" == "comment" ]]; then - echo "https://github.com/testowner/testrepo/pull/123#comment-1"; exit 0 - fi ;; - api) - if [[ "$2" == "user" ]]; then - echo '{"login":"testuser"}'; exit 0 - elif [[ "$2" == *"/comments"* ]] || [[ "$2" == *"/reviews"* ]]; then - echo "[]"; exit 0 - fi - echo "[]"; exit 0 ;; -esac -echo "Mock gh: unhandled: $ALL_ARGS" >&2; exit 1 -GH_MOCK_EOF - chmod +x "$PR_MOCK_BIN/gh" - - # Create mock codex (not called during setup, but required by command -v check) - cat > "$PR_MOCK_BIN/codex" << 'CODEX_MOCK_EOF' -#!/bin/bash -exit 0 -CODEX_MOCK_EOF - chmod +x "$PR_MOCK_BIN/codex" - - # Run setup-pr-loop.sh with --codex-model override - pr_setup_exit=0 - pr_output=$(cd "$PR_E2E_PROJECT" && \ - CLAUDE_PROJECT_DIR="$PR_E2E_PROJECT" \ - XDG_CONFIG_HOME="$TEST_DIR/no-user-config" \ - PATH="$PR_MOCK_BIN:$PATH" \ - timeout 30 bash "$SETUP_PR_LOOP" --claude --codex-model override-model:xhigh 2>&1) || pr_setup_exit=$? - - assert_eq "PR loop e2e: setup-pr-loop.sh exited successfully" \ - "0" "$pr_setup_exit" - - # Find the generated PR loop state.md - PR_STATE_FILE=$(find "$PR_E2E_PROJECT/.humanize/pr-loop" -name "state.md" 2>/dev/null | head -1 || true) - if [[ -z "$PR_STATE_FILE" ]]; then - fail "PR loop e2e: state.md was created" "non-empty path" "empty" - else - pass "PR loop e2e: state.md was created" - - # Assert --codex-model override is stored in state, not config values - assert_eq "PR loop e2e: --codex-model set codex_model (override-model)" \ - "override-model" "$(grep '^codex_model:' "$PR_STATE_FILE" | sed 's/codex_model: *//')" - - assert_eq "PR loop e2e: --codex-model set codex_effort (xhigh)" \ - "xhigh" "$(grep '^codex_effort:' "$PR_STATE_FILE" | sed 's/codex_effort: *//')" - fi - fi -fi - -echo "" - # ======================================== # ask-codex runtime behavioral test # ======================================== @@ -915,7 +724,7 @@ else MOCK_BIN="$TEST_DIR/mock-bin" mkdir -p "$MOCK_BIN" cat > "$MOCK_BIN/codex" << 'MOCK_EOF' -#!/bin/bash +#!/usr/bin/env bash echo "mock codex response" exit 0 MOCK_EOF