From 29a528fbd6b8ff97be5c60443047d5d2d193f89f Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:16:52 -0700 Subject: [PATCH 01/97] Add methodology analysis phase for RLCR loop exit Add a pre-exit analysis phase that spawns an Opus agent to review development records from a methodology perspective. The agent produces a sanitized report (no project-specific info) and optionally helps the user file a GitHub issue with improvement suggestions. New --privacy flag disables the feature; legacy loops default to privacy=true (opt-in only for new loops). The phase integrates into all three non-manual exit paths (complete, stop, maxiter) using the established Finalize Phase pattern with state file renaming and a completion artifact gate. --- commands/cancel-rlcr-loop.md | 3 +- commands/start-rlcr-loop.md | 2 +- hooks/lib/loop-common.sh | 37 +++- hooks/lib/methodology-analysis.sh | 186 ++++++++++++++++++ hooks/loop-bash-validator.sh | 30 ++- hooks/loop-codex-stop-hook.sh | 45 ++++- hooks/loop-edit-validator.sh | 9 +- hooks/loop-write-validator.sh | 13 +- .../claude/methodology-analysis-prompt.md | 73 +++++++ scripts/cancel-rlcr-loop.sh | 13 +- scripts/lib/monitor-common.sh | 6 +- scripts/setup-rlcr-loop.sh | 7 + skills/humanize/SKILL.md | 4 + 13 files changed, 410 insertions(+), 18 deletions(-) create mode 100644 hooks/lib/methodology-analysis.sh create mode 100644 prompt-template/claude/methodology-analysis-prompt.md diff --git a/commands/cancel-rlcr-loop.md b/commands/cancel-rlcr-loop.md index f6891406..eceb580e 100644 --- a/commands/cancel-rlcr-loop.md +++ b/commands/cancel-rlcr-loop.md @@ -17,6 +17,7 @@ To cancel the active loop: 2. Check the first line of output: - **NO_LOOP** or **NO_ACTIVE_LOOP**: Say "No active RLCR loop found." - **CANCELLED**: Report the cancellation message from the output + - **CANCELLED_METHODOLOGY_ANALYSIS**: Report the cancellation message from the output - **CANCELLED_FINALIZE**: Report the cancellation message from the output - **FINALIZE_NEEDS_CONFIRM**: The loop is in Finalize Phase. Continue to step 3 @@ -33,6 +34,6 @@ To cancel the active loop: - **If user chooses "No, let it finish"**: - Report: "Understood. The Finalize Phase will continue. Once complete, the loop will end normally." -**Key principle**: The script handles all cancellation logic. A loop is active if `state.md` (normal loop) or `finalize-state.md` (Finalize Phase) exists in the newest loop directory. +**Key principle**: The script handles all cancellation logic. A loop is active if `state.md` (normal loop), `methodology-analysis-state.md` (Methodology Analysis Phase), or `finalize-state.md` (Finalize Phase) exists in the newest loop directory. The loop directory with summaries, review results, and state information will be preserved for reference. diff --git a/commands/start-rlcr-loop.md b/commands/start-rlcr-loop.md index 8d08ce6a..a73ea27f 100644 --- a/commands/start-rlcr-loop.md +++ b/commands/start-rlcr-loop.md @@ -1,6 +1,6 @@ --- description: "Start iterative loop with Codex review" -argument-hint: "[path/to/plan.md | --plan-file path/to/plan.md] [--max N] [--codex-model MODEL:EFFORT] [--codex-timeout SECONDS] [--track-plan-file] [--push-every-round] [--base-branch BRANCH] [--full-review-round N] [--skip-impl] [--claude-answer-codex] [--agent-teams]" +argument-hint: "[path/to/plan.md | --plan-file path/to/plan.md] [--max N] [--codex-model MODEL:EFFORT] [--codex-timeout SECONDS] [--track-plan-file] [--push-every-round] [--base-branch BRANCH] [--full-review-round N] [--skip-impl] [--claude-answer-codex] [--agent-teams] [--privacy]" allowed-tools: - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/setup-rlcr-loop.sh:*)" - "Read" diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 5151018f..ea8f62c7 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -38,6 +38,7 @@ readonly FIELD_FULL_REVIEW_ROUND="full_review_round" readonly FIELD_ASK_CODEX_QUESTION="ask_codex_question" readonly FIELD_SESSION_ID="session_id" readonly FIELD_AGENT_TEAMS="agent_teams" +readonly FIELD_PRIVACY_MODE="privacy_mode" # Default Codex configuration (single source of truth - all scripts reference this) # Scripts can pre-set DEFAULT_CODEX_MODEL/DEFAULT_CODEX_EFFORT before sourcing to override. @@ -216,7 +217,9 @@ extract_session_id() { resolve_active_state_file() { local loop_dir="$1" - if [[ -f "$loop_dir/finalize-state.md" ]]; then + if [[ -f "$loop_dir/methodology-analysis-state.md" ]]; then + echo "$loop_dir/methodology-analysis-state.md" + elif [[ -f "$loop_dir/finalize-state.md" ]]; then echo "$loop_dir/finalize-state.md" elif [[ -f "$loop_dir/state.md" ]]; then echo "$loop_dir/state.md" @@ -234,7 +237,10 @@ resolve_any_state_file() { local loop_dir="$1" # Prefer active states - if [[ -f "$loop_dir/finalize-state.md" ]]; then + if [[ -f "$loop_dir/methodology-analysis-state.md" ]]; then + echo "$loop_dir/methodology-analysis-state.md" + return + elif [[ -f "$loop_dir/finalize-state.md" ]]; then echo "$loop_dir/finalize-state.md" return elif [[ -f "$loop_dir/state.md" ]]; then @@ -364,6 +370,7 @@ _parse_state_fields() { STATE_ASK_CODEX_QUESTION=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_ASK_CODEX_QUESTION}:" | sed "s/${FIELD_ASK_CODEX_QUESTION}: *//" | tr -d ' ' || true) STATE_SESSION_ID=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_SESSION_ID}:" | sed "s/${FIELD_SESSION_ID}: *//" || true) STATE_AGENT_TEAMS=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_AGENT_TEAMS}:" | sed "s/${FIELD_AGENT_TEAMS}: *//" | tr -d ' ' || true) + STATE_PRIVACY_MODE=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_PRIVACY_MODE}:" | sed "s/${FIELD_PRIVACY_MODE}: *//" | tr -d ' ' || true) } # Parse state file frontmatter and set variables (tolerant mode with defaults) @@ -406,6 +413,8 @@ parse_state_file() { STATE_FULL_REVIEW_ROUND="${STATE_FULL_REVIEW_ROUND:-5}" STATE_ASK_CODEX_QUESTION="${STATE_ASK_CODEX_QUESTION:-true}" STATE_AGENT_TEAMS="${STATE_AGENT_TEAMS:-false}" + # Default privacy_mode to "true" for legacy loops that pre-date this field + STATE_PRIVACY_MODE="${STATE_PRIVACY_MODE:-true}" # STATE_REVIEW_STARTED left as-is (empty if missing, to allow schema validation) return 0 @@ -683,6 +692,21 @@ is_finalize_state_file_path() { echo "$path_lower" | grep -qE 'finalize-state\.md$' } +# Check if a path (lowercase) targets methodology-analysis-state.md +is_methodology_analysis_state_file_path() { + local path_lower="$1" + echo "$path_lower" | grep -qE 'methodology-analysis-state\.md$' +} + +# Standard message for blocking methodology-analysis-state file modifications +methodology_analysis_state_file_blocked_message() { + local fallback="# Methodology Analysis State File Modification Blocked + +You cannot modify methodology-analysis-state.md. This file is managed by the loop system during the Methodology Analysis Phase." + + load_and_render_safe "$TEMPLATE_DIR" "block/methodology-analysis-state-file-modification.md" "$fallback" +} + # Check if a path (lowercase) targets finalize-summary.md is_finalize_summary_path() { local path_lower="$1" @@ -847,7 +871,8 @@ is_cancel_authorized() { src=$(_normalize_path "$src") local expected_src_state="${loop_dir_lower}state.md" local expected_src_finalize="${loop_dir_lower}finalize-state.md" - if [[ "$src" != "$expected_src_state" ]] && [[ "$src" != "$expected_src_finalize" ]]; then + local expected_src_methodology="${loop_dir_lower}methodology-analysis-state.md" + if [[ "$src" != "$expected_src_state" ]] && [[ "$src" != "$expected_src_finalize" ]] && [[ "$src" != "$expected_src_methodology" ]]; then return 5 fi @@ -860,9 +885,11 @@ is_cancel_authorized() { # SECURITY: Reject if source file is a symlink (filesystem check) # Determine source file by comparing against expected paths (not substring match) - # This avoids vulnerability when loop directory path contains "finalize" + # This avoids vulnerability when loop directory path contains "finalize" or "methodology" local src_original - if [[ "$src" == "$expected_src_finalize" ]]; then + if [[ "$src" == "$expected_src_methodology" ]]; then + src_original="${active_loop_dir}/methodology-analysis-state.md" + elif [[ "$src" == "$expected_src_finalize" ]]; then src_original="${active_loop_dir}/finalize-state.md" else src_original="${active_loop_dir}/state.md" diff --git a/hooks/lib/methodology-analysis.sh b/hooks/lib/methodology-analysis.sh new file mode 100644 index 00000000..0b61d5ae --- /dev/null +++ b/hooks/lib/methodology-analysis.sh @@ -0,0 +1,186 @@ +#!/bin/bash +# +# Methodology Analysis Phase library +# +# Provides functions for the methodology improvement analysis phase that runs +# before the RLCR loop truly exits. An independent Opus agent analyzes the +# development records from a pure methodology perspective and optionally helps +# the user file a GitHub issue with improvement suggestions. +# +# This library is sourced by loop-codex-stop-hook.sh. +# + +# Source guard: prevent double-sourcing +[[ -n "${_METHODOLOGY_ANALYSIS_LOADED:-}" ]] && return 0 2>/dev/null || true +_METHODOLOGY_ANALYSIS_LOADED=1 + +# Enter the methodology analysis phase +# +# Renames the current state file to methodology-analysis-state.md, records the +# exit reason, renders the analysis prompt, and outputs a block JSON response. +# +# Arguments: +# $1 - exit_reason: "complete", "stop", or "maxiter" +# $2 - exit_reason_description: human-readable explanation of why the loop is exiting +# +# Globals read: +# PRIVACY_MODE - "true" to skip analysis, "false" to proceed +# STATE_FILE - path to the current active state file +# LOOP_DIR - path to the loop directory +# CURRENT_ROUND - current round number +# MAX_ITERATIONS - max iterations setting +# TEMPLATE_DIR - template directory for prompt rendering +# +# Returns: +# 0 - analysis phase entered, block JSON has been output, caller should exit 0 +# 1 - analysis should be skipped (privacy on, already done, or re-entry) +# +enter_methodology_analysis_phase() { + local exit_reason="$1" + local exit_reason_description="$2" + + # Skip if privacy mode is on + if [[ "$PRIVACY_MODE" == "true" ]]; then + echo "Methodology analysis skipped (privacy mode enabled)" >&2 + return 1 + fi + + # Prevent re-entry: if methodology-analysis-state.md already exists, skip + if [[ -f "$LOOP_DIR/methodology-analysis-state.md" ]]; then + echo "Methodology analysis phase already active, skipping re-entry" >&2 + return 1 + fi + + # Skip if already completed in a previous attempt + if [[ -f "$LOOP_DIR/methodology-analysis-done.md" ]]; then + local done_content + done_content=$(cat "$LOOP_DIR/methodology-analysis-done.md" 2>/dev/null || echo "") + if [[ -n "$done_content" ]]; then + echo "Methodology analysis already completed, skipping" >&2 + return 1 + fi + fi + + # Rename current state file to methodology-analysis-state.md + mv "$STATE_FILE" "$LOOP_DIR/methodology-analysis-state.md" + echo "State file renamed to: $LOOP_DIR/methodology-analysis-state.md" >&2 + + # Record the original exit reason so the completion handler can finalize + echo "$exit_reason" > "$LOOP_DIR/.methodology-exit-reason" + + # Create empty placeholder for the completion artifact + touch "$LOOP_DIR/methodology-analysis-done.md" + + # Render prompt template + local fallback="# Methodology Analysis Phase + +Please analyze the development records in $LOOP_DIR and provide methodology improvement suggestions. +Write your analysis to $LOOP_DIR/methodology-analysis-report.md. +When done, write a completion note to $LOOP_DIR/methodology-analysis-done.md." + + local analysis_prompt + analysis_prompt=$(load_and_render_safe "$TEMPLATE_DIR" "claude/methodology-analysis-prompt.md" "$fallback" \ + "LOOP_DIR=$LOOP_DIR" \ + "EXIT_REASON=$exit_reason" \ + "EXIT_REASON_DESCRIPTION=$exit_reason_description" \ + "CURRENT_ROUND=$CURRENT_ROUND" \ + "MAX_ITERATIONS=$MAX_ITERATIONS") + + # Output block JSON with the rendered prompt + jq -n \ + --arg reason "$analysis_prompt" \ + --arg msg "Loop: Methodology Analysis Phase - analyzing development methodology" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + + return 0 +} + +# Complete the methodology analysis phase +# +# Checks the completion artifact, reads the original exit reason, renames the +# state file to the appropriate terminal state, and cleans up marker files. +# +# Globals read: +# LOOP_DIR - path to the loop directory +# +# Returns: +# 0 - completion successful, caller should exit 0 (allow exit) +# 1 - completion artifact missing or empty, caller should block +# +complete_methodology_analysis() { + local done_file="$LOOP_DIR/methodology-analysis-done.md" + + # Check completion artifact has actual content (not just empty placeholder) + if [[ ! -f "$done_file" ]]; then + return 1 + fi + + local done_content + done_content=$(cat "$done_file" 2>/dev/null || echo "") + if [[ -z "$done_content" ]]; then + return 1 + fi + + # Read exit reason + local exit_reason="complete" + if [[ -f "$LOOP_DIR/.methodology-exit-reason" ]]; then + exit_reason=$(cat "$LOOP_DIR/.methodology-exit-reason" 2>/dev/null || echo "complete") + exit_reason=$(echo "$exit_reason" | tr -d '[:space:]') + fi + + # Validate exit reason + case "$exit_reason" in + complete|stop|maxiter) + ;; + *) + echo "Warning: Invalid methodology exit reason '$exit_reason', defaulting to complete" >&2 + exit_reason="complete" + ;; + esac + + # Rename methodology-analysis-state.md to the terminal state + local target_name="${exit_reason}-state.md" + mv "$LOOP_DIR/methodology-analysis-state.md" "$LOOP_DIR/$target_name" + echo "Methodology analysis complete. State preserved as: $LOOP_DIR/$target_name" >&2 + + # Clean up marker file + rm -f "$LOOP_DIR/.methodology-exit-reason" + + return 0 +} + +# Block exit because methodology analysis is incomplete +# +# Outputs a block JSON instructing Claude to complete the analysis before exiting. +# +# Globals read: +# LOOP_DIR - path to the loop directory +# +block_methodology_analysis_incomplete() { + local done_file="$LOOP_DIR/methodology-analysis-done.md" + + local reason="# Methodology Analysis Incomplete + +Please complete the methodology analysis before exiting. + +You need to: +1. Spawn an Opus agent to analyze the development records +2. Review the analysis report +3. Optionally help the user file a GitHub issue +4. Write a completion note to: $done_file + +The completion marker file must contain actual content (not be empty) to signal that the analysis is done." + + jq -n \ + --arg reason "$reason" \ + --arg msg "Loop: Methodology Analysis Phase - please complete the analysis" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' +} diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 948612e1..211e1478 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -148,6 +148,15 @@ if [[ -n "$ACTIVE_LOOP_DIR" ]]; then # 1. command_modifies_file checks if DESTINATION contains state.md # 2. Additional check below catches if SOURCE contains state.md (e.g., mv state.md /tmp/foo) +if command_modifies_file "$COMMAND_LOWER" "methodology-analysis-state\.md"; then + # Check for cancel signal file - allow authorized cancel operation + if is_cancel_authorized "$ACTIVE_LOOP_DIR" "$COMMAND_LOWER"; then + exit 0 + fi + methodology_analysis_state_file_blocked_message >&2 + exit 2 +fi + if command_modifies_file "$COMMAND_LOWER" "finalize-state\.md"; then # Check for cancel signal file - allow authorized cancel operation if is_cancel_authorized "$ACTIVE_LOOP_DIR" "$COMMAND_LOWER"; then @@ -182,6 +191,7 @@ fi # This catches chained commands like: true; mv state.md /tmp/foo MV_CP_SOURCE_PATTERN="^[[:space:]]*(sudo([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(env[[:space:]]+[^;&|]*[[:space:]]+)?(command([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(mv|cp)[[:space:]].*[[:space:]/\"']state\.md" MV_CP_FINALIZE_SOURCE_PATTERN="^[[:space:]]*(sudo([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(env[[:space:]]+[^;&|]*[[:space:]]+)?(command([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(mv|cp)[[:space:]].*[[:space:]/\"']finalize-state\.md" +MV_CP_METHODOLOGY_SOURCE_PATTERN="^[[:space:]]*(sudo([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(env[[:space:]]+[^;&|]*[[:space:]]+)?(command([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(mv|cp)[[:space:]].*[[:space:]/\"']methodology-analysis-state\.md" # Replace shell operators with newlines, then check each segment # Order matters: |& before |, && before single & @@ -295,7 +305,17 @@ while IFS= read -r SEGMENT; do t again ') - # Check for finalize-state.md as SOURCE first (more specific pattern) + # Check for methodology-analysis-state.md as SOURCE first (most specific pattern) + if echo "$SEGMENT_CLEANED" | grep -qE "$MV_CP_METHODOLOGY_SOURCE_PATTERN"; then + # Check for cancel signal file - allow authorized cancel operation + if is_cancel_authorized "$ACTIVE_LOOP_DIR" "$COMMAND_LOWER"; then + exit 0 + fi + methodology_analysis_state_file_blocked_message >&2 + exit 2 + fi + + # Check for finalize-state.md as SOURCE (more specific than state.md) if echo "$SEGMENT_CLEANED" | grep -qE "$MV_CP_FINALIZE_SOURCE_PATTERN"; then # Check for cancel signal file - allow authorized cancel operation if is_cancel_authorized "$ACTIVE_LOOP_DIR" "$COMMAND_LOWER"; then @@ -319,6 +339,14 @@ done <<< "$COMMAND_SEGMENTS" # This catches bypass attempts like: sh -c 'mv state.md /tmp/foo' # Pattern: look for sh/bash with -c flag and state.md or finalize-state.md in the payload if echo "$COMMAND_LOWER" | grep -qE "(^|[[:space:]/])(sh|bash)[[:space:]]+-c[[:space:]]"; then + # Shell wrapper detected - check if payload contains mv/cp methodology-analysis-state.md (most specific) + if echo "$COMMAND_LOWER" | grep -qE "(mv|cp)[[:space:]].*methodology-analysis-state\.md"; then + if is_cancel_authorized "$ACTIVE_LOOP_DIR" "$COMMAND_LOWER"; then + exit 0 + fi + methodology_analysis_state_file_blocked_message >&2 + exit 2 + fi # Shell wrapper detected - check if payload contains mv/cp finalize-state.md (check first, more specific) if echo "$COMMAND_LOWER" | grep -qE "(mv|cp)[[:space:]].*finalize-state\.md"; then # Check for cancel signal file - allow authorized cancel operation diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 25142818..9339713d 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -50,6 +50,9 @@ source "$SCRIPT_DIR/lib/loop-common.sh" PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "$PLUGIN_ROOT/scripts/portable-timeout.sh" +# Source methodology analysis library +source "$SCRIPT_DIR/lib/methodology-analysis.sh" + # Default timeout for git operations (30 seconds) GIT_TIMEOUT=30 @@ -80,6 +83,9 @@ fi IS_FINALIZE_PHASE=false [[ "$STATE_FILE" == *"/finalize-state.md" ]] && IS_FINALIZE_PHASE=true +IS_METHODOLOGY_ANALYSIS_PHASE=false +[[ "$STATE_FILE" == *"/methodology-analysis-state.md" ]] && IS_METHODOLOGY_ANALYSIS_PHASE=true + # ======================================== # Parse State File (using shared function) # ======================================== @@ -120,6 +126,7 @@ CODEX_REVIEW_EFFORT="high" CODEX_TIMEOUT="${STATE_CODEX_TIMEOUT:-${CODEX_TIMEOUT:-$DEFAULT_CODEX_TIMEOUT}}" ASK_CODEX_QUESTION="${STATE_ASK_CODEX_QUESTION:-false}" AGENT_TEAMS="${STATE_AGENT_TEAMS:-false}" +PRIVACY_MODE="${STATE_PRIVACY_MODE:-true}" BITLESSON_REQUIRED="false" if [[ -n "$RAW_BITLESSON_REQUIRED" ]]; then BITLESSON_REQUIRED=$(echo "$RAW_BITLESSON_REQUIRED" | sed 's/^bitlesson_required:[[:space:]]*//' | tr -d ' "') @@ -675,6 +682,25 @@ Please push before exiting." fi fi +# ======================================== +# Methodology Analysis Phase Completion Handler +# ======================================== +# When in methodology analysis phase, check if the analysis is done. +# If done, rename state to the original exit reason's terminal state. +# If not done, block and ask Claude to complete the analysis. +# All other checks (summary, bitlesson, goal tracker, max iterations) are skipped. + +if [[ "$IS_METHODOLOGY_ANALYSIS_PHASE" == "true" ]]; then + if complete_methodology_analysis; then + # Analysis complete, allow exit + exit 0 + else + # Analysis not yet complete, block + block_methodology_analysis_incomplete + exit 0 + fi +fi + # ======================================== # Check Summary File Exists # ======================================== @@ -823,6 +849,10 @@ NEXT_ROUND=$((CURRENT_ROUND + 1)) # - Review Phase: must continue until [P?] issues are cleared, regardless of iteration count if [[ "$IS_FINALIZE_PHASE" != "true" ]] && [[ "$REVIEW_STARTED" != "true" ]] && [[ $NEXT_ROUND -gt $MAX_ITERATIONS ]]; then echo "RLCR loop did not complete, but reached max iterations ($MAX_ITERATIONS). Exiting." >&2 + # Try to enter methodology analysis phase before final exit + if enter_methodology_analysis_phase "maxiter" "Reached max iterations ($MAX_ITERATIONS) without completion"; then + exit 0 + fi end_loop "$LOOP_DIR" "$STATE_FILE" "$EXIT_MAXITER" exit 0 fi @@ -834,8 +864,12 @@ fi # No Codex review is performed - this is the final step after Codex already confirmed COMPLETE if [[ "$IS_FINALIZE_PHASE" == "true" ]]; then - echo "Finalize Phase complete. All checks passed. Loop finished!" >&2 - # Rename finalize-state.md to complete-state.md + echo "Finalize Phase complete. All checks passed." >&2 + # Try to enter methodology analysis phase before final exit + if enter_methodology_analysis_phase "complete" "All acceptance criteria met and code review passed"; then + exit 0 + fi + # Methodology analysis skipped or already done - proceed with normal exit mv "$STATE_FILE" "$LOOP_DIR/complete-state.md" echo "State preserved as: $LOOP_DIR/complete-state.md" >&2 exit 0 @@ -1547,6 +1581,9 @@ if [[ "$LAST_LINE_TRIMMED" == "$MARKER_COMPLETE" ]]; then # Max iterations check if [[ $CURRENT_ROUND -ge $MAX_ITERATIONS ]]; then echo "Codex review passed but at max iterations ($MAX_ITERATIONS). Terminating as MAXITER." >&2 + if enter_methodology_analysis_phase "maxiter" "Codex confirmed COMPLETE but at max iterations ($MAX_ITERATIONS)"; then + exit 0 + fi end_loop "$LOOP_DIR" "$STATE_FILE" "$EXIT_MAXITER" exit 0 fi @@ -1640,6 +1677,10 @@ if [[ "$LAST_LINE_TRIMMED" == "$MARKER_STOP" ]]; then echo " $REVIEW_RESULT_FILE" >&2 fi echo "========================================" >&2 + # Try to enter methodology analysis phase before final exit + if enter_methodology_analysis_phase "stop" "Circuit breaker triggered - stagnation detected at round $CURRENT_ROUND"; then + exit 0 + fi end_loop "$LOOP_DIR" "$STATE_FILE" "$EXIT_STOP" exit 0 fi diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 76cf9c03..851ad5c0 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -110,9 +110,14 @@ fi CURRENT_ROUND="$STATE_CURRENT_ROUND" # ======================================== -# Block State File Edits (state.md and finalize-state.md) +# Block State File Edits (state.md, finalize-state.md, methodology-analysis-state.md) # ======================================== -# NOTE: Check finalize-state.md FIRST because is_state_file_path also matches finalize-state.md +# NOTE: Check most specific patterns first because is_state_file_path matches any *state.md + +if is_methodology_analysis_state_file_path "$FILE_PATH_LOWER"; then + methodology_analysis_state_file_blocked_message >&2 + exit 2 +fi if is_finalize_state_file_path "$FILE_PATH_LOWER"; then finalize_state_file_blocked_message >&2 diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 02090265..633caf73 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -109,12 +109,12 @@ if [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ] exit 0 fi -# For state.md, finalize-state.md, goal-tracker.md, and plan.md in .humanize/rlcr, we need further validation +# For state.md, finalize-state.md, methodology-analysis-state.md, goal-tracker.md, and plan.md in .humanize/rlcr, we need further validation # For other files in .humanize/rlcr that aren't summaries, allow them FILENAME=$(basename "$FILE_PATH") IS_PLAN_BACKUP=$([[ "$FILENAME" == "plan.md" ]] && echo "true" || echo "false") if [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]] && [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ]]; then - if ! is_state_file_path "$FILE_PATH_LOWER" && ! is_finalize_state_file_path "$FILE_PATH_LOWER" && ! is_goal_tracker_path "$FILE_PATH_LOWER" && [[ "$IS_PLAN_BACKUP" != "true" ]]; then + if ! is_state_file_path "$FILE_PATH_LOWER" && ! is_finalize_state_file_path "$FILE_PATH_LOWER" && ! is_methodology_analysis_state_file_path "$FILE_PATH_LOWER" && ! is_goal_tracker_path "$FILE_PATH_LOWER" && [[ "$IS_PLAN_BACKUP" != "true" ]]; then exit 0 fi fi @@ -147,9 +147,14 @@ fi CURRENT_ROUND="$STATE_CURRENT_ROUND" # ======================================== -# Block State File Writes (state.md and finalize-state.md) +# Block State File Writes (state.md, finalize-state.md, methodology-analysis-state.md) # ======================================== -# NOTE: Check finalize-state.md FIRST because is_state_file_path also matches finalize-state.md +# NOTE: Check most specific patterns first because is_state_file_path matches any *state.md + +if is_methodology_analysis_state_file_path "$FILE_PATH_LOWER"; then + methodology_analysis_state_file_blocked_message >&2 + exit 2 +fi if is_finalize_state_file_path "$FILE_PATH_LOWER"; then finalize_state_file_blocked_message >&2 diff --git a/prompt-template/claude/methodology-analysis-prompt.md b/prompt-template/claude/methodology-analysis-prompt.md new file mode 100644 index 00000000..f8a7ec4b --- /dev/null +++ b/prompt-template/claude/methodology-analysis-prompt.md @@ -0,0 +1,73 @@ +# Methodology Analysis Phase + +The RLCR loop has reached its exit point. + +**Exit reason**: {{EXIT_REASON}} - {{EXIT_REASON_DESCRIPTION}} +**Rounds completed**: {{CURRENT_ROUND}} of {{MAX_ITERATIONS}} + +Before the loop fully exits, please perform a methodology improvement analysis. This analysis helps improve the Humanize development methodology itself -- it is NOT about the project you just worked on. + +## Instructions + +### 1. Spawn an Opus Agent for Sanitized Analysis + +Use the Agent tool with `model: "opus"` to spawn an analysis agent. Give it this task: + +**Agent prompt**: Read the development records in `{{LOOP_DIR}}`: +- All files matching `round-*-summary.md` +- All files matching `round-*-review-result.md` + +Analyze these records from a **pure methodology perspective** and write your findings to `{{LOOP_DIR}}/methodology-analysis-report.md`. + +**CRITICAL SANITIZATION RULES** - The report MUST NOT contain: +- File paths, directory paths, or module paths +- Function names, variable names, class names, or method names +- Branch names, commit hashes, or git identifiers +- Business domain terms, product names, or feature names +- Code snippets or code fragments of any kind +- Raw error messages or stack traces +- Project-specific URLs or endpoints +- Any information that could identify the specific project + +**Focus areas for analysis**: +- Iteration efficiency: Were rounds productive or did they repeat similar work? +- Feedback loop quality: Did reviewer feedback lead to meaningful improvements? +- Stagnation patterns: Were there signs of going in circles? +- Review effectiveness: Did reviews catch real issues or create false positives? +- Plan-to-execution alignment: Did execution follow the plan or drift? +- Round count vs. progress ratio: Was the number of rounds proportional to progress? +- Communication clarity: Were summaries and reviews clear and actionable? + +**Output format**: Write a structured report with methodology improvement suggestions. Each suggestion should describe a general pattern observed and a concrete improvement to the RLCR methodology. If no improvements are found, write a brief note saying the methodology worked well for this session. + +### 2. Read the Analysis Report + +After the agent completes, read `{{LOOP_DIR}}/methodology-analysis-report.md`. ALL subsequent user-facing content MUST be derived solely from this report -- do NOT reference raw development records directly. + +### 3. Handle Results + +**If no improvements found**: Briefly inform the user that the methodology analysis found no significant improvement suggestions. Then write a completion note to `{{LOOP_DIR}}/methodology-analysis-done.md` and exit. + +**If improvements found**: + +a) Report to the user: + - Brief summary of the exit reason ({{EXIT_REASON}}: {{EXIT_REASON_DESCRIPTION}}) + - Methodology improvement suggestions from the report + +b) Use `AskUserQuestion` to ask if the user would like to help improve Humanize by opening a GitHub issue with these suggestions. Emphasize: + - This is completely voluntary + - The content is fully sanitized (no project-specific information) + - It helps improve the methodology for everyone + +c) **If user declines**: Thank them, write completion marker to `{{LOOP_DIR}}/methodology-analysis-done.md`, and exit. + +d) **If user agrees**: + - Draft a GitHub issue title and body from the analysis report + - Show the draft via a second `AskUserQuestion` for the user to review and confirm + - If confirmed: run `gh issue create --repo humania-org/humanize --title "..." --body "..."` + - If `gh` is not available, provide the title and body so the user can create the issue manually + - Write completion marker to `{{LOOP_DIR}}/methodology-analysis-done.md` and exit + +### 4. Completion Marker + +You MUST write meaningful content to `{{LOOP_DIR}}/methodology-analysis-done.md` before exiting. This file signals that the analysis phase is complete. A brief summary of what was done (e.g., "Analysis complete, no suggestions" or "Analysis complete, issue filed") is sufficient. diff --git a/scripts/cancel-rlcr-loop.sh b/scripts/cancel-rlcr-loop.sh index 907b051e..8f58b873 100755 --- a/scripts/cancel-rlcr-loop.sh +++ b/scripts/cancel-rlcr-loop.sh @@ -50,7 +50,7 @@ DESCRIPTION: Cancels the active RLCR loop by: 1. Finding the most recent loop directory 2. Creating a .cancel-requested signal file - 3. Renaming state.md or finalize-state.md to cancel-state.md + 3. Renaming state.md, methodology-analysis-state.md, or finalize-state.md to cancel-state.md HELP_EOF exit 0 ;; @@ -98,11 +98,15 @@ fi STATE_FILE="$LOOP_DIR/state.md" FINALIZE_STATE_FILE="$LOOP_DIR/finalize-state.md" +METHODOLOGY_ANALYSIS_STATE_FILE="$LOOP_DIR/methodology-analysis-state.md" CANCEL_SIGNAL="$LOOP_DIR/.cancel-requested" if [[ -f "$STATE_FILE" ]]; then LOOP_STATE="NORMAL_LOOP" ACTIVE_STATE_FILE="$STATE_FILE" +elif [[ -f "$METHODOLOGY_ANALYSIS_STATE_FILE" ]]; then + LOOP_STATE="METHODOLOGY_ANALYSIS_PHASE" + ACTIVE_STATE_FILE="$METHODOLOGY_ANALYSIS_STATE_FILE" elif [[ -f "$FINALIZE_STATE_FILE" ]]; then LOOP_STATE="FINALIZE_PHASE" ACTIVE_STATE_FILE="$FINALIZE_STATE_FILE" @@ -151,6 +155,9 @@ touch "$CANCEL_SIGNAL" # Clean up any pending session_id signal file (setup may not have completed) rm -f "$PROJECT_ROOT/.humanize/.pending-session-id" +# Clean up methodology analysis marker files if present +rm -f "$LOOP_DIR/.methodology-exit-reason" + # Rename state file to cancel-state.md mv "$ACTIVE_STATE_FILE" "$LOOP_DIR/cancel-state.md" @@ -162,6 +169,10 @@ if [[ "$LOOP_STATE" == "NORMAL_LOOP" ]]; then echo "CANCELLED" echo "Cancelled RLCR loop (was at round $CURRENT_ROUND of $MAX_ITERATIONS)." echo "State preserved as cancel-state.md" +elif [[ "$LOOP_STATE" == "METHODOLOGY_ANALYSIS_PHASE" ]]; then + echo "CANCELLED_METHODOLOGY_ANALYSIS" + echo "Cancelled RLCR loop during Methodology Analysis Phase (was at round $CURRENT_ROUND of $MAX_ITERATIONS)." + echo "State preserved as cancel-state.md" else echo "CANCELLED_FINALIZE" echo "Cancelled RLCR loop during Finalize Phase (was at round $CURRENT_ROUND of $MAX_ITERATIONS)." diff --git a/scripts/lib/monitor-common.sh b/scripts/lib/monitor-common.sh index 26bdaa9b..405b25fd 100644 --- a/scripts/lib/monitor-common.sh +++ b/scripts/lib/monitor-common.sh @@ -159,7 +159,11 @@ monitor_find_state_file() { return fi - # Priority 1: state.md indicates active loop + # Priority 1: Active state files indicate running loop + if [[ -f "$session_dir/methodology-analysis-state.md" ]]; then + echo "$session_dir/methodology-analysis-state.md|methodology-analysis" + return + fi if [[ -f "$session_dir/state.md" ]]; then echo "$session_dir/state.md|active" return diff --git a/scripts/setup-rlcr-loop.sh b/scripts/setup-rlcr-loop.sh index 1b449712..59300bb4 100755 --- a/scripts/setup-rlcr-loop.sh +++ b/scripts/setup-rlcr-loop.sh @@ -51,6 +51,7 @@ SKIP_IMPL_NO_PLAN="false" ASK_CODEX_QUESTION="true" AGENT_TEAMS="false" BITLESSON_ALLOW_EMPTY_NONE="true" +PRIVACY_MODE="false" show_help() { cat <&2 echo "Use --help for usage information" >&2 @@ -840,6 +846,7 @@ review_started: $INITIAL_REVIEW_STARTED ask_codex_question: $ASK_CODEX_QUESTION session_id: agent_teams: $AGENT_TEAMS +privacy_mode: $PRIVACY_MODE bitlesson_required: $BITLESSON_STATE_VALUE bitlesson_file: $BITLESSON_FILE_REL bitlesson_allow_empty_none: $BITLESSON_ALLOW_EMPTY_NONE diff --git a/skills/humanize/SKILL.md b/skills/humanize/SKILL.md index c3219d78..f5800f2d 100644 --- a/skills/humanize/SKILL.md +++ b/skills/humanize/SKILL.md @@ -96,6 +96,7 @@ Transforms a rough draft document into a structured implementation plan with: - `--push-every-round` - Require git push after each round - `--claude-answer-codex` - Let Claude answer Codex Open Questions directly (default is AskUserQuestion) - `--agent-teams` - Enable Agent Teams mode +- `--privacy` - Disable methodology analysis at loop exit (default: analysis enabled) ### Cancel RLCR Loop @@ -226,6 +227,9 @@ Humanize stores all data in `.humanize/`: │ ├── round-N-review-result.md │ ├── finalize-state.md │ ├── finalize-summary.md +│ ├── methodology-analysis-state.md +│ ├── methodology-analysis-report.md +│ ├── methodology-analysis-done.md │ └── complete-state.md ├── pr-loop/ # PR loop data │ └── / From d3ded24b793e3b48988cb245d279ce44aab25e38 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:21:52 -0700 Subject: [PATCH 02/97] Fix methodology analysis completion: require report and fail closed Address Codex review findings: - Require methodology-analysis-report.md to exist before allowing completion (prevents silent no-op when Opus agent does not run) - Fail closed when .methodology-exit-reason marker is missing or invalid instead of defaulting to "complete" (prevents misreporting stop/maxiter loops as successful) --- hooks/lib/methodology-analysis.sh | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/hooks/lib/methodology-analysis.sh b/hooks/lib/methodology-analysis.sh index 0b61d5ae..adbde197 100644 --- a/hooks/lib/methodology-analysis.sh +++ b/hooks/lib/methodology-analysis.sh @@ -109,10 +109,11 @@ When done, write a completion note to $LOOP_DIR/methodology-analysis-done.md." # # Returns: # 0 - completion successful, caller should exit 0 (allow exit) -# 1 - completion artifact missing or empty, caller should block +# 1 - incomplete (done marker missing/empty, report missing, or exit reason invalid) # complete_methodology_analysis() { local done_file="$LOOP_DIR/methodology-analysis-done.md" + local report_file="$LOOP_DIR/methodology-analysis-report.md" # Check completion artifact has actual content (not just empty placeholder) if [[ ! -f "$done_file" ]]; then @@ -125,20 +126,29 @@ complete_methodology_analysis() { return 1 fi - # Read exit reason - local exit_reason="complete" - if [[ -f "$LOOP_DIR/.methodology-exit-reason" ]]; then - exit_reason=$(cat "$LOOP_DIR/.methodology-exit-reason" 2>/dev/null || echo "complete") - exit_reason=$(echo "$exit_reason" | tr -d '[:space:]') + # Require the analysis report to exist (ensures the Opus agent actually ran) + if [[ ! -f "$report_file" ]]; then + echo "Warning: methodology-analysis-report.md missing, blocking completion" >&2 + return 1 + fi + + # Read exit reason (fail closed: missing marker blocks completion) + if [[ ! -f "$LOOP_DIR/.methodology-exit-reason" ]]; then + echo "Error: .methodology-exit-reason marker missing, cannot determine terminal state" >&2 + return 1 fi - # Validate exit reason + local exit_reason + exit_reason=$(cat "$LOOP_DIR/.methodology-exit-reason" 2>/dev/null || echo "") + exit_reason=$(echo "$exit_reason" | tr -d '[:space:]') + + # Validate exit reason (fail closed on invalid values) case "$exit_reason" in complete|stop|maxiter) ;; *) - echo "Warning: Invalid methodology exit reason '$exit_reason', defaulting to complete" >&2 - exit_reason="complete" + echo "Error: Invalid methodology exit reason '$exit_reason', blocking completion" >&2 + return 1 ;; esac From 7282f6e0729f3197a2d4cbb4175f587f083951d4 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:27:09 -0700 Subject: [PATCH 03/97] Fix read validator to allow historical round access during methodology analysis The Opus analysis agent needs to read all round-*-summary.md and round-*-review-result.md files, but the read validator was blocking access to summaries from non-current rounds. Bypass the round number check when methodology-analysis-state.md is the active state file, while still requiring files to be within the active loop directory. --- hooks/loop-read-validator.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index f0b6f71f..b483e15f 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -88,9 +88,18 @@ if [[ -z "$ACTIVE_LOOP_DIR" ]]; then exit 0 fi -# Detect if we're in Finalize Phase (finalize-state.md exists) +# Detect loop phase from state file STATE_FILE_TO_PARSE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") +# In Methodology Analysis Phase, allow reading all round files (summaries and review results) +# The analysis agent needs access to the full development history +if [[ "$STATE_FILE_TO_PARSE" == *"/methodology-analysis-state.md" ]]; then + # Only allow reads within the active loop directory + if [[ "$FILE_PATH" == "$ACTIVE_LOOP_DIR/"* ]]; then + exit 0 + fi +fi + # Parse state file using strict validation (fail closed on malformed state) if ! parse_state_file_strict "$STATE_FILE_TO_PARSE" 2>/dev/null; then echo "Error: Malformed state file, blocking operation for safety" >&2 From 50e8bbbf1b8d4947da8d182228cc55df09eefecf Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:32:31 -0700 Subject: [PATCH 04/97] Harden methodology analysis read bypass with path canonicalization and allowlist Address Codex review findings: - Use realpath to canonicalize paths before prefix check, preventing directory traversal attacks (e.g., LOOP_DIR/../sensitive-file) - Restrict allowed reads to an explicit allowlist of files the analysis agent actually needs: round summaries, review results, and its own artifacts. This prevents exposing plan.md, prompt files, and other project-specific loop metadata that would undercut sanitization. --- hooks/loop-read-validator.sh | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index b483e15f..a948cd22 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -91,12 +91,26 @@ fi # Detect loop phase from state file STATE_FILE_TO_PARSE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") -# In Methodology Analysis Phase, allow reading all round files (summaries and review results) -# The analysis agent needs access to the full development history +# In Methodology Analysis Phase, allow reading specific analysis-related files only +# The Opus agent needs round summaries, review results, and its own artifacts if [[ "$STATE_FILE_TO_PARSE" == *"/methodology-analysis-state.md" ]]; then - # Only allow reads within the active loop directory - if [[ "$FILE_PATH" == "$ACTIVE_LOOP_DIR/"* ]]; then - exit 0 + # Canonicalize to prevent path traversal (e.g., $LOOP_DIR/../secrets) + local_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + local_real_loop=$(realpath "$ACTIVE_LOOP_DIR" 2>/dev/null || echo "") + if [[ -n "$local_real_path" ]] && [[ -n "$local_real_loop" ]] && \ + [[ "$local_real_path" == "$local_real_loop/"* ]]; then + local_basename=$(basename "$local_real_path") + # Allowlist: only files the analysis agent needs + # - round-*-summary.md: development record summaries + # - round-*-review-result.md: Codex review feedback + # - methodology-analysis-report.md: the agent's own output + # - methodology-analysis-done.md: completion marker + # - methodology-analysis-state.md: state file (for parsing) + case "$local_basename" in + round-*-summary.md|round-*-review-result.md|methodology-analysis-report.md|methodology-analysis-done.md|methodology-analysis-state.md) + exit 0 + ;; + esac fi fi From f34d45f07c698d742bda15d0dc32d652a5abaf4f Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:43:08 -0700 Subject: [PATCH 05/97] Enforce read-only project files during methodology analysis phase Add methodology analysis restrictions to all four validators: - Read validator: restrict loop dir reads to allowlisted artifacts only - Write validator: block all writes except methodology report/done marker - Edit validator: block all edits except methodology report/done marker - Bash validator: block git write commands and in-place file editing tools This prevents source code modifications after Codex has signed off and prevents project-specific information from leaking into the analysis report. --- hooks/loop-bash-validator.sh | 29 ++++++++++++++ hooks/loop-edit-validator.sh | 30 ++++++++++++++ hooks/loop-read-validator.sh | 75 +++++++++++++++++++++++------------ hooks/loop-write-validator.sh | 30 ++++++++++++++ 4 files changed, 138 insertions(+), 26 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 211e1478..a39bd9ff 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -97,6 +97,35 @@ if [[ -n "$ACTIVE_LOOP_DIR" ]]; then fi CURRENT_ROUND="$STATE_CURRENT_ROUND" + # ======================================== + # Methodology Analysis Phase Bash Restriction + # ======================================== + # During methodology analysis, block file-modifying bash commands. + # Only gh commands and read-only operations are allowed. + # This prevents source code modifications after Codex has signed off. + + if [[ "$STATE_FILE" == *"/methodology-analysis-state.md" ]]; then + # Allow gh commands for issue creation + if [[ "$COMMAND_LOWER" =~ ^[[:space:]]*gh[[:space:]] ]]; then + exit 0 + fi + # Block git commands that modify the working tree + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])git[[:space:]]+(commit|add|reset|checkout|merge|rebase|cherry-pick|am|apply|stash|push)'; then + echo "# Bash Blocked During Methodology Analysis + +Git commands that modify the working tree are not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block in-place file editing tools (bypass for Write/Edit tool restriction) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(tee|install)[[:space:]]' || \ + echo "$COMMAND_LOWER" | grep -qE 'sed[[:space:]]+-i|awk[[:space:]]+-i[[:space:]]+inplace|perl[[:space:]]+-[^[:space:]]*i'; then + echo "# Bash Blocked During Methodology Analysis + +File modification commands are not allowed during the methodology analysis phase." >&2 + exit 2 + fi + fi + # ======================================== # Block Git Push When push_every_round is false # ======================================== diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 851ad5c0..a52604b7 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -79,6 +79,36 @@ if [[ "$IN_PR_LOOP_DIR" == "true" ]]; then fi fi +# ======================================== +# Methodology Analysis Phase Edit Restriction +# ======================================== +# During methodology analysis, only methodology artifacts can be edited. +# This prevents source code modifications after Codex has signed off. +# This check MUST come before the humanize loop dir early exit below. + +PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" +LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" +_MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" + +if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then + _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") + if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ + [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then + _ma_basename=$(basename "$_ma_real_path") + case "$_ma_basename" in + methodology-analysis-report.md|methodology-analysis-done.md) + exit 0 + ;; + esac + fi + echo "# Edit Blocked During Methodology Analysis + +During the methodology analysis phase, only methodology artifacts can be edited. +Allowed: methodology-analysis-report.md, methodology-analysis-done.md" >&2 + exit 2 +fi + # ======================================== # Check if File is in .humanize/rlcr # ======================================== diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index a948cd22..e33554a0 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -65,6 +65,53 @@ if is_round_file_type "$FILE_PATH_LOWER" "todos"; then fi fi +# ======================================== +# Methodology Analysis Phase Read Restriction +# ======================================== +# During methodology analysis, restrict reads of files within the loop +# directory to only the artifacts the analysis agent needs. This prevents +# project-specific information from leaking into the analysis report. +# Files outside the loop directory are allowed (Claude needs system files). +# This check MUST come before the summary/prompt early exit below, +# otherwise non-summary/prompt files in the loop dir escape restriction. + +PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" +LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" +ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" + +if [[ -n "$ACTIVE_LOOP_DIR" ]]; then + _MA_STATE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") + if [[ "$_MA_STATE" == *"/methodology-analysis-state.md" ]]; then + # Canonicalize to prevent path traversal + _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + _ma_real_loop=$(realpath "$ACTIVE_LOOP_DIR" 2>/dev/null || echo "") + if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ + [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then + _ma_basename=$(basename "$_ma_real_path") + # Allowlist: only files the analysis agent needs + # - round-*-summary.md: development record summaries + # - round-*-review-result.md: review feedback + # - methodology-analysis-report.md: the agent's own output + # - methodology-analysis-done.md: completion marker + # - methodology-analysis-state.md: state file (for parsing) + case "$_ma_basename" in + round-*-summary.md|round-*-review-result.md|methodology-analysis-report.md|methodology-analysis-done.md|methodology-analysis-state.md) + exit 0 + ;; + *) + echo "# Read Blocked During Methodology Analysis + +Only analysis artifacts can be read from the loop directory during this phase. +Allowed: round-*-summary.md, round-*-review-result.md, methodology-analysis-*.md" >&2 + exit 2 + ;; + esac + fi + # Files outside loop dir are allowed (Claude needs system files to function) + exit 0 + fi +fi + # ======================================== # Check for Round Files (summary/prompt) # ======================================== @@ -80,9 +127,8 @@ IN_HUMANIZE_LOOP_DIR=$(is_in_humanize_loop_dir "$FILE_PATH" && echo "true" || ec # Find Active Loop and Current Round # ======================================== -PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" -LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" -ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" +# Re-use ACTIVE_LOOP_DIR if already set by methodology analysis check above +ACTIVE_LOOP_DIR="${ACTIVE_LOOP_DIR:-${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}}" if [[ -z "$ACTIVE_LOOP_DIR" ]]; then exit 0 @@ -91,29 +137,6 @@ fi # Detect loop phase from state file STATE_FILE_TO_PARSE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") -# In Methodology Analysis Phase, allow reading specific analysis-related files only -# The Opus agent needs round summaries, review results, and its own artifacts -if [[ "$STATE_FILE_TO_PARSE" == *"/methodology-analysis-state.md" ]]; then - # Canonicalize to prevent path traversal (e.g., $LOOP_DIR/../secrets) - local_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") - local_real_loop=$(realpath "$ACTIVE_LOOP_DIR" 2>/dev/null || echo "") - if [[ -n "$local_real_path" ]] && [[ -n "$local_real_loop" ]] && \ - [[ "$local_real_path" == "$local_real_loop/"* ]]; then - local_basename=$(basename "$local_real_path") - # Allowlist: only files the analysis agent needs - # - round-*-summary.md: development record summaries - # - round-*-review-result.md: Codex review feedback - # - methodology-analysis-report.md: the agent's own output - # - methodology-analysis-done.md: completion marker - # - methodology-analysis-state.md: state file (for parsing) - case "$local_basename" in - round-*-summary.md|round-*-review-result.md|methodology-analysis-report.md|methodology-analysis-done.md|methodology-analysis-state.md) - exit 0 - ;; - esac - fi -fi - # Parse state file using strict validation (fail closed on malformed state) if ! parse_state_file_strict "$STATE_FILE_TO_PARSE" 2>/dev/null; then echo "Error: Malformed state file, blocking operation for safety" >&2 diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 633caf73..4b7fca83 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -96,6 +96,36 @@ if [[ "$IN_PR_LOOP_DIR" == "true" ]]; then fi fi +# ======================================== +# Methodology Analysis Phase Write Restriction +# ======================================== +# During methodology analysis, only methodology artifacts can be written. +# This prevents source code modifications after Codex has signed off. +# This check MUST come before the file type early exits below. + +PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" +LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" +_MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" + +if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then + _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") + if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ + [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then + _ma_basename=$(basename "$_ma_real_path") + case "$_ma_basename" in + methodology-analysis-report.md|methodology-analysis-done.md) + exit 0 + ;; + esac + fi + echo "# Write Blocked During Methodology Analysis + +During the methodology analysis phase, only methodology artifacts can be written. +Allowed: methodology-analysis-report.md, methodology-analysis-done.md" >&2 + exit 2 +fi + # ======================================== # Determine File Types # ======================================== From 87687a2ccf563c0bb51b920acfd38c4a4fbb072d Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:49:39 -0700 Subject: [PATCH 06/97] Fix spawned agent bypass and broaden bash restriction coverage All four validators now try unfiltered loop search when session-filtered search returns empty, so spawned agents (with different session_id) are also subject to methodology analysis restrictions. Bash validator now blocks: touch, mv, cp, rm, dd, truncate, chmod, chown, output redirection to non-/dev/ paths, and all git write commands. --- hooks/loop-bash-validator.sh | 79 ++++++++++++++++++++++------------- hooks/loop-edit-validator.sh | 6 +++ hooks/loop-read-validator.sh | 13 ++++-- hooks/loop-write-validator.sh | 6 +++ 4 files changed, 72 insertions(+), 32 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index a39bd9ff..27fe3858 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -64,6 +64,56 @@ ACTIVE_LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID") PR_LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/pr-loop" ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") +# ======================================== +# Methodology Analysis Phase Bash Restriction +# ======================================== +# During methodology analysis, block file-modifying bash commands. +# Only gh commands and read-only operations are allowed. +# This prevents source code modifications after Codex has signed off. +# Uses unfiltered search to also apply to spawned agents with different session_id. + +_MA_BASH_DIR="$ACTIVE_LOOP_DIR" +if [[ -z "$_MA_BASH_DIR" ]]; then + _MA_BASH_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +fi + +if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then + # Allow gh commands for issue creation + if [[ "$COMMAND_LOWER" =~ ^[[:space:]]*gh[[:space:]] ]]; then + exit 0 + fi + # Block git commands that modify the working tree + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])git[[:space:]]+(commit|add|reset|checkout|merge|rebase|cherry-pick|am|apply|stash|push)'; then + echo "# Bash Blocked During Methodology Analysis + +Git write commands are not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block file manipulation commands (touch, mv, cp, rm, etc.) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(tee|install|touch|mv|cp|rm|dd|truncate|chmod|chown)[[:space:]]'; then + echo "# Bash Blocked During Methodology Analysis + +File modification commands are not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block in-place file editing tools + if echo "$COMMAND_LOWER" | grep -qE 'sed[[:space:]]+-i|awk[[:space:]]+-i[[:space:]]+inplace|perl[[:space:]]+-[^[:space:]]*i'; then + echo "# Bash Blocked During Methodology Analysis + +In-place file editing is not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block output redirection to files (catches cat > file, echo > file, etc.) + # Strip safe redirections (/dev/ paths, fd duplication) then check for remaining > + _ma_stripped=$(echo "$COMMAND_LOWER" | sed 's|[0-9]*>[>]*[[:space:]]*/dev/[^[:space:]]*||g; s|[0-9]*>&[0-9]*||g') + if echo "$_ma_stripped" | grep -qE '[>]'; then + echo "# Bash Blocked During Methodology Analysis + +File redirection is not allowed during the methodology analysis phase." >&2 + exit 2 + fi +fi + # If no active loop of either type, allow all commands if [[ -z "$ACTIVE_LOOP_DIR" ]] && [[ -z "$ACTIVE_PR_LOOP_DIR" ]]; then exit 0 @@ -97,35 +147,6 @@ if [[ -n "$ACTIVE_LOOP_DIR" ]]; then fi CURRENT_ROUND="$STATE_CURRENT_ROUND" - # ======================================== - # Methodology Analysis Phase Bash Restriction - # ======================================== - # During methodology analysis, block file-modifying bash commands. - # Only gh commands and read-only operations are allowed. - # This prevents source code modifications after Codex has signed off. - - if [[ "$STATE_FILE" == *"/methodology-analysis-state.md" ]]; then - # Allow gh commands for issue creation - if [[ "$COMMAND_LOWER" =~ ^[[:space:]]*gh[[:space:]] ]]; then - exit 0 - fi - # Block git commands that modify the working tree - if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])git[[:space:]]+(commit|add|reset|checkout|merge|rebase|cherry-pick|am|apply|stash|push)'; then - echo "# Bash Blocked During Methodology Analysis - -Git commands that modify the working tree are not allowed during the methodology analysis phase." >&2 - exit 2 - fi - # Block in-place file editing tools (bypass for Write/Edit tool restriction) - if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(tee|install)[[:space:]]' || \ - echo "$COMMAND_LOWER" | grep -qE 'sed[[:space:]]+-i|awk[[:space:]]+-i[[:space:]]+inplace|perl[[:space:]]+-[^[:space:]]*i'; then - echo "# Bash Blocked During Methodology Analysis - -File modification commands are not allowed during the methodology analysis phase." >&2 - exit 2 - fi - fi - # ======================================== # Block Git Push When push_every_round is false # ======================================== diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index a52604b7..52b7ec66 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -90,6 +90,12 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" +# Spawned agents (e.g., Opus analysis agent) have a different session_id. +# Try unfiltered search to detect methodology analysis phase for them. +if [[ -z "$_MA_LOOP_DIR" ]]; then + _MA_LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +fi + if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index e33554a0..efb66e4c 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -79,12 +79,19 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" -if [[ -n "$ACTIVE_LOOP_DIR" ]]; then - _MA_STATE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") +# Spawned agents (e.g., Opus analysis agent) have a different session_id. +# Try unfiltered search to detect methodology analysis phase for them. +_MA_CHECK_DIR="$ACTIVE_LOOP_DIR" +if [[ -z "$_MA_CHECK_DIR" ]]; then + _MA_CHECK_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +fi + +if [[ -n "$_MA_CHECK_DIR" ]]; then + _MA_STATE=$(resolve_active_state_file "$_MA_CHECK_DIR") if [[ "$_MA_STATE" == *"/methodology-analysis-state.md" ]]; then # Canonicalize to prevent path traversal _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") - _ma_real_loop=$(realpath "$ACTIVE_LOOP_DIR" 2>/dev/null || echo "") + _ma_real_loop=$(realpath "$_MA_CHECK_DIR" 2>/dev/null || echo "") if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 4b7fca83..9ce8cdf8 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -107,6 +107,12 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" +# Spawned agents (e.g., Opus analysis agent) have a different session_id. +# Try unfiltered search to detect methodology analysis phase for them. +if [[ -z "$_MA_LOOP_DIR" ]]; then + _MA_LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +fi + if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") From 9888044b0adac29990a56788916cea6af8b673a5 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:53:31 -0700 Subject: [PATCH 07/97] Fix realpath for non-existent files and narrow gh allowlist Handle BSD/macOS where realpath fails for non-existent files by resolving the parent directory and appending the basename. This allows the initial Write to methodology-analysis-report.md to succeed. Narrow the bash gh allowlist from all gh commands to only gh issue subcommands, preventing workspace mutations via gh pr checkout, gh repo clone, or mutating gh api calls. --- hooks/loop-bash-validator.sh | 5 +++-- hooks/loop-edit-validator.sh | 5 +++++ hooks/loop-read-validator.sh | 5 +++++ hooks/loop-write-validator.sh | 5 +++++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 27fe3858..285df8e1 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -78,8 +78,9 @@ if [[ -z "$_MA_BASH_DIR" ]]; then fi if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then - # Allow gh commands for issue creation - if [[ "$COMMAND_LOWER" =~ ^[[:space:]]*gh[[:space:]] ]]; then + # Allow only gh issue commands (for optional feedback issue creation) + # Block other gh subcommands (pr checkout, repo clone, api, etc.) + if [[ "$COMMAND_LOWER" =~ ^[[:space:]]*gh[[:space:]]+issue[[:space:]] ]]; then exit 0 fi # Block git commands that modify the working tree diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 52b7ec66..685b44a8 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -97,7 +97,12 @@ if [[ -z "$_MA_LOOP_DIR" ]]; then fi if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then + # If realpath fails (file doesn't exist yet on BSD/macOS), resolve parent dir _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + if [[ -z "$_ma_real_path" ]]; then + _ma_parent=$(realpath "$(dirname "$FILE_PATH")" 2>/dev/null || echo "") + [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")" + fi _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index efb66e4c..428a0bec 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -90,7 +90,12 @@ if [[ -n "$_MA_CHECK_DIR" ]]; then _MA_STATE=$(resolve_active_state_file "$_MA_CHECK_DIR") if [[ "$_MA_STATE" == *"/methodology-analysis-state.md" ]]; then # Canonicalize to prevent path traversal + # If realpath fails (file doesn't exist yet on BSD/macOS), resolve parent dir _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + if [[ -z "$_ma_real_path" ]]; then + _ma_parent=$(realpath "$(dirname "$FILE_PATH")" 2>/dev/null || echo "") + [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")" + fi _ma_real_loop=$(realpath "$_MA_CHECK_DIR" 2>/dev/null || echo "") if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 9ce8cdf8..775bccbd 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -114,7 +114,12 @@ if [[ -z "$_MA_LOOP_DIR" ]]; then fi if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then + # If realpath fails (file doesn't exist yet on BSD/macOS), resolve parent dir _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + if [[ -z "$_ma_real_path" ]]; then + _ma_parent=$(realpath "$(dirname "$FILE_PATH")" 2>/dev/null || echo "") + [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")" + fi _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then From 4c48209459159b8521bdd4db459d2780b6898b41 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:59:47 -0700 Subject: [PATCH 08/97] Block project-root reads and interpreters during methodology analysis Read validator now blocks reads of files within the project root (not just loop dir) during methodology analysis, while still allowing system files outside the project (CLAUDE.md, configs). This prevents the analysis agent from accessing source code that could leak into the report. Bash validator now blocks common interpreter commands (python, ruby, node, perl, php) during methodology analysis as defense-in-depth against file write bypasses. --- hooks/loop-bash-validator.sh | 7 +++++++ hooks/loop-read-validator.sh | 16 +++++++++++++++- hooks/loop-write-validator.sh | 3 +++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 285df8e1..f9477520 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -104,6 +104,13 @@ File modification commands are not allowed during the methodology analysis phase In-place file editing is not allowed during the methodology analysis phase." >&2 exit 2 fi + # Block common interpreters that could write files (defense-in-depth) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(python[23]?|ruby|node|perl|php)[[:space:]]'; then + echo "# Bash Blocked During Methodology Analysis + +Running interpreters is not allowed during the methodology analysis phase." >&2 + exit 2 + fi # Block output redirection to files (catches cat > file, echo > file, etc.) # Strip safe redirections (/dev/ paths, fd duplication) then check for remaining > _ma_stripped=$(echo "$COMMAND_LOWER" | sed 's|[0-9]*>[>]*[[:space:]]*/dev/[^[:space:]]*||g; s|[0-9]*>&[0-9]*||g') diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 428a0bec..4bbf55e0 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -119,7 +119,21 @@ Allowed: round-*-summary.md, round-*-review-result.md, methodology-analysis-*.md ;; esac fi - # Files outside loop dir are allowed (Claude needs system files to function) + # Files within the project root are blocked (project-specific information) + # Files outside the project root are allowed (system files, config, etc.) + _ma_project_real=$(realpath "$PROJECT_ROOT" 2>/dev/null || echo "") + if [[ -n "$_ma_project_real" ]]; then + _ma_path_check="${_ma_real_path:-$FILE_PATH}" + if [[ "$_ma_path_check" == "$_ma_project_real/"* ]] || \ + [[ "$_ma_path_check" == "$PROJECT_ROOT/"* ]]; then + echo "# Read Blocked During Methodology Analysis + +Reading project files is not allowed during the methodology analysis phase. +Only analysis artifacts within the loop directory can be read. +Allowed: round-*-summary.md, round-*-review-result.md, methodology-analysis-*.md" >&2 + exit 2 + fi + fi exit 0 fi fi diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 775bccbd..8fda456c 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -109,6 +109,9 @@ _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID" # Spawned agents (e.g., Opus analysis agent) have a different session_id. # Try unfiltered search to detect methodology analysis phase for them. +# Note: This may briefly affect concurrent sessions in the same repo, but +# methodology analysis is short-lived and this ensures spawned agents +# cannot bypass the write freeze after Codex has signed off. if [[ -z "$_MA_LOOP_DIR" ]]; then _MA_LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") fi From 786ed3259fb11b91c7e90f4dbddf9bfb1804741b Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:09:02 -0700 Subject: [PATCH 09/97] Block shell entry points and fix concurrent session binding in methodology analysis Add find_methodology_analysis_loop() that scans all loop directories for methodology-analysis-state.md instead of using the unfiltered find_active_loop fallback which only returns the newest active loop. This prevents spawned agents from binding to a wrong concurrent session during methodology analysis. Block shell script entry points (bash/sh/zsh, build tools, source/dot commands, direct script execution) in the bash validator during methodology analysis to prevent bypassing file modification restrictions via wrapper binaries. --- hooks/lib/loop-common.sh | 32 ++++++++++++++++++++++++++++++++ hooks/loop-bash-validator.sh | 35 +++++++++++++++++++++++++++++++++-- hooks/loop-edit-validator.sh | 7 ++++--- hooks/loop-read-validator.sh | 7 ++++--- hooks/loop-write-validator.sh | 10 ++++------ 5 files changed, 77 insertions(+), 14 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index ea8f62c7..8b465f5d 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -332,6 +332,38 @@ find_active_loop() { echo "" } +# Find any active loop directory currently in methodology analysis phase. +# Unlike find_active_loop() which returns the newest active loop (possibly the +# wrong one when multiple concurrent sessions exist), this function specifically +# searches for a loop with methodology-analysis-state.md present. +# This ensures spawned agents (which have different session_ids) always bind to +# the correct originating loop during methodology analysis. +# +# Args: +# $1 - loop_base_dir: path to .humanize/rlcr +# +# Outputs the directory path to stdout, or empty string if none found +find_methodology_analysis_loop() { + local loop_base_dir="$1" + + if [[ ! -d "$loop_base_dir" ]]; then + echo "" + return + fi + + local dir + while IFS= read -r dir; do + [[ -z "$dir" ]] && continue + local trimmed_dir="${dir%/}" + if [[ -f "$trimmed_dir/methodology-analysis-state.md" ]]; then + echo "$trimmed_dir" + return + fi + done < <(ls -1d "$loop_base_dir"/*/ 2>/dev/null | sort -r) + + echo "" +} + # Extract current round number from state.md # Outputs the round number to stdout, defaults to 0 # Note: For full state parsing, use parse_state_file() instead diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index f9477520..62323f46 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -73,8 +73,11 @@ ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") # Uses unfiltered search to also apply to spawned agents with different session_id. _MA_BASH_DIR="$ACTIVE_LOOP_DIR" -if [[ -z "$_MA_BASH_DIR" ]]; then - _MA_BASH_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +if [[ -z "$_MA_BASH_DIR" ]] || [[ ! -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then + # Spawned agents have a different session_id, so session-filtered search may + # miss the originating loop. Use targeted search that scans ALL loops for + # methodology-analysis-state.md to avoid binding to a wrong concurrent session. + _MA_BASH_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then @@ -111,6 +114,34 @@ In-place file editing is not allowed during the methodology analysis phase." >&2 Running interpreters is not allowed during the methodology analysis phase." >&2 exit 2 fi + # Block shell script entry points (bash script.sh, sh script.sh, source, .) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(/usr/bin/env[[:space:]]+)?(bash|sh|zsh|/bin/bash|/bin/sh|/bin/zsh)[[:space:]]'; then + echo "# Bash Blocked During Methodology Analysis + +Running shell scripts is not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block build tools that execute arbitrary commands + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(make|cmake|ninja|gradle|mvn|ant|cargo|go[[:space:]]+run|go[[:space:]]+generate|npm[[:space:]]+run|yarn[[:space:]]+run|npx|pnpm)[[:space:]]'; then + echo "# Bash Blocked During Methodology Analysis + +Build tools are not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block source/dot commands (source script.sh, . script.sh) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(source|\.)[ ]+[^[:space:]]'; then + echo "# Bash Blocked During Methodology Analysis + +Sourcing scripts is not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block direct script execution (./script.sh, ../script.sh, /path/to/script) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])\.{0,2}/[^[:space:]>|&;]*\.(sh|bash|py|rb|pl|js)'; then + echo "# Bash Blocked During Methodology Analysis + +Direct script execution is not allowed during the methodology analysis phase." >&2 + exit 2 + fi # Block output redirection to files (catches cat > file, echo > file, etc.) # Strip safe redirections (/dev/ paths, fd duplication) then check for remaining > _ma_stripped=$(echo "$COMMAND_LOWER" | sed 's|[0-9]*>[>]*[[:space:]]*/dev/[^[:space:]]*||g; s|[0-9]*>&[0-9]*||g') diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 685b44a8..89604d37 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -91,9 +91,10 @@ LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" # Spawned agents (e.g., Opus analysis agent) have a different session_id. -# Try unfiltered search to detect methodology analysis phase for them. -if [[ -z "$_MA_LOOP_DIR" ]]; then - _MA_LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +# Use targeted search that scans ALL loops for methodology-analysis-state.md +# to avoid binding to a wrong concurrent session. +if [[ -z "$_MA_LOOP_DIR" ]] || [[ ! -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then + _MA_LOOP_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 4bbf55e0..2229ff94 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -80,10 +80,11 @@ LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" # Spawned agents (e.g., Opus analysis agent) have a different session_id. -# Try unfiltered search to detect methodology analysis phase for them. +# Use targeted search that scans ALL loops for methodology-analysis-state.md +# to avoid binding to a wrong concurrent session. _MA_CHECK_DIR="$ACTIVE_LOOP_DIR" -if [[ -z "$_MA_CHECK_DIR" ]]; then - _MA_CHECK_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +if [[ -z "$_MA_CHECK_DIR" ]] || [[ ! -f "$_MA_CHECK_DIR/methodology-analysis-state.md" ]]; then + _MA_CHECK_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi if [[ -n "$_MA_CHECK_DIR" ]]; then diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 8fda456c..2831f428 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -108,12 +108,10 @@ LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" # Spawned agents (e.g., Opus analysis agent) have a different session_id. -# Try unfiltered search to detect methodology analysis phase for them. -# Note: This may briefly affect concurrent sessions in the same repo, but -# methodology analysis is short-lived and this ensures spawned agents -# cannot bypass the write freeze after Codex has signed off. -if [[ -z "$_MA_LOOP_DIR" ]]; then - _MA_LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +# Use targeted search that scans ALL loops for methodology-analysis-state.md +# to avoid binding to a wrong concurrent session. +if [[ -z "$_MA_LOOP_DIR" ]] || [[ ! -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then + _MA_LOOP_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then From f2a12c6947ec187d43ae67f9303cbea1dac5a3ab Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:13:58 -0700 Subject: [PATCH 10/97] Allow cancel script during methodology analysis, remove gh issue early exit Remove the gh issue early exit that short-circuited all subsequent methodology analysis checks. Commands like 'gh issue create; rm file' were bypassing blockers. Now all commands go through the full blocklist; pure gh issue commands pass naturally since they match no blocker. Add cancel-rlcr-loop.sh to the allowlist so the cancel command works during the methodology analysis phase. Document concurrent methodology analysis limitation in find_methodology_analysis_loop. --- hooks/lib/loop-common.sh | 6 ++++++ hooks/loop-bash-validator.sh | 5 ++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 8b465f5d..6405c4b9 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -339,6 +339,12 @@ find_active_loop() { # This ensures spawned agents (which have different session_ids) always bind to # the correct originating loop during methodology analysis. # +# Limitation: If two loops are simultaneously in methodology analysis, this +# returns the newest one. The older session's spawned agents would bind to the +# wrong loop. This is accepted because concurrent methodology analyses are +# extremely unlikely (the phase is short-lived and requires two active RLCR +# sessions to overlap at this specific point). +# # Args: # $1 - loop_base_dir: path to .humanize/rlcr # diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 62323f46..3f97c775 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -81,9 +81,8 @@ if [[ -z "$_MA_BASH_DIR" ]] || [[ ! -f "$_MA_BASH_DIR/methodology-analysis-state fi if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then - # Allow only gh issue commands (for optional feedback issue creation) - # Block other gh subcommands (pr checkout, repo clone, api, etc.) - if [[ "$COMMAND_LOWER" =~ ^[[:space:]]*gh[[:space:]]+issue[[:space:]] ]]; then + # Allow cancel-rlcr-loop.sh (user must be able to cancel during this phase) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])([^[:space:]]*/)?cancel-rlcr-loop\.sh'; then exit 0 fi # Block git commands that modify the working tree From 7b91c4701b0bf685e059dab9aaec28228db4e1f7 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:20:17 -0700 Subject: [PATCH 11/97] Scope methodology fallback to spawned agents only, add missing git subcommands Only fall back to find_methodology_analysis_loop when NO session-matched loop was found (spawned agent case). If a session has its own active loop, do NOT search for another session's methodology analysis state -- that would apply restrictions to an unrelated concurrent session. Add git restore, clean, rm, mv to the methodology analysis git command blocklist to prevent working tree modifications after Codex signoff. --- hooks/loop-bash-validator.sh | 10 +++++----- hooks/loop-edit-validator.sh | 8 ++++---- hooks/loop-read-validator.sh | 8 ++++---- hooks/loop-write-validator.sh | 8 ++++---- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 3f97c775..28c30567 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -73,10 +73,10 @@ ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") # Uses unfiltered search to also apply to spawned agents with different session_id. _MA_BASH_DIR="$ACTIVE_LOOP_DIR" -if [[ -z "$_MA_BASH_DIR" ]] || [[ ! -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then - # Spawned agents have a different session_id, so session-filtered search may - # miss the originating loop. Use targeted search that scans ALL loops for - # methodology-analysis-state.md to avoid binding to a wrong concurrent session. +if [[ -z "$_MA_BASH_DIR" ]]; then + # Only fall back when NO session-matched loop was found (spawned agent case). + # If the session has its own active loop, do NOT search for another session's + # methodology analysis -- that would incorrectly restrict the current session. _MA_BASH_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi @@ -86,7 +86,7 @@ if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.m exit 0 fi # Block git commands that modify the working tree - if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])git[[:space:]]+(commit|add|reset|checkout|merge|rebase|cherry-pick|am|apply|stash|push)'; then + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])git[[:space:]]+(commit|add|reset|checkout|merge|rebase|cherry-pick|am|apply|stash|push|restore|clean|rm|mv)'; then echo "# Bash Blocked During Methodology Analysis Git write commands are not allowed during the methodology analysis phase." >&2 diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 89604d37..5f6bed92 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -90,10 +90,10 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" -# Spawned agents (e.g., Opus analysis agent) have a different session_id. -# Use targeted search that scans ALL loops for methodology-analysis-state.md -# to avoid binding to a wrong concurrent session. -if [[ -z "$_MA_LOOP_DIR" ]] || [[ ! -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then +# Only fall back when NO session-matched loop was found (spawned agent case). +# If the session has its own active loop, do NOT search for another session's +# methodology analysis -- that would incorrectly restrict the current session. +if [[ -z "$_MA_LOOP_DIR" ]]; then _MA_LOOP_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 2229ff94..6d9c6c38 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -79,11 +79,11 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" -# Spawned agents (e.g., Opus analysis agent) have a different session_id. -# Use targeted search that scans ALL loops for methodology-analysis-state.md -# to avoid binding to a wrong concurrent session. +# Only fall back when NO session-matched loop was found (spawned agent case). +# If the session has its own active loop, do NOT search for another session's +# methodology analysis -- that would incorrectly restrict the current session. _MA_CHECK_DIR="$ACTIVE_LOOP_DIR" -if [[ -z "$_MA_CHECK_DIR" ]] || [[ ! -f "$_MA_CHECK_DIR/methodology-analysis-state.md" ]]; then +if [[ -z "$_MA_CHECK_DIR" ]]; then _MA_CHECK_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 2831f428..0239a9e3 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -107,10 +107,10 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" -# Spawned agents (e.g., Opus analysis agent) have a different session_id. -# Use targeted search that scans ALL loops for methodology-analysis-state.md -# to avoid binding to a wrong concurrent session. -if [[ -z "$_MA_LOOP_DIR" ]] || [[ ! -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then +# Only fall back when NO session-matched loop was found (spawned agent case). +# If the session has its own active loop, do NOT search for another session's +# methodology analysis -- that would incorrectly restrict the current session. +if [[ -z "$_MA_LOOP_DIR" ]]; then _MA_LOOP_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi From 3c8533258a519b03a1d944afc94067678069a8f6 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:26:17 -0700 Subject: [PATCH 12/97] Remove cross-session methodology fallback, add realpath raw path fallback Remove unfiltered find_methodology_analysis_loop fallback from all validators. The fallback incorrectly applied methodology analysis restrictions to unrelated sessions opened in the same repo. Now only the originating session (matched by session_id) gets restricted. Spawned agents rely on their prompt for guidance. Add raw path fallback when realpath is unavailable (older macOS/BSD) to prevent deadlock where the originating session cannot write completion artifacts. --- hooks/loop-bash-validator.sh | 9 +++------ hooks/loop-edit-validator.sh | 16 +++++++--------- hooks/loop-read-validator.sh | 18 ++++++++---------- hooks/loop-write-validator.sh | 16 +++++++--------- 4 files changed, 25 insertions(+), 34 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 28c30567..282e6c32 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -72,13 +72,10 @@ ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") # This prevents source code modifications after Codex has signed off. # Uses unfiltered search to also apply to spawned agents with different session_id. +# Use only the session-matched loop. Do NOT fall back to an unfiltered search, +# as that would incorrectly restrict unrelated sessions opened in the same repo. +# Spawned agents (with different session_ids) are guided by their prompt instead. _MA_BASH_DIR="$ACTIVE_LOOP_DIR" -if [[ -z "$_MA_BASH_DIR" ]]; then - # Only fall back when NO session-matched loop was found (spawned agent case). - # If the session has its own active loop, do NOT search for another session's - # methodology analysis -- that would incorrectly restrict the current session. - _MA_BASH_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") -fi if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then # Allow cancel-rlcr-loop.sh (user must be able to cancel during this phase) diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 5f6bed92..3b5becde 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -88,15 +88,11 @@ fi PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" +# Use only the session-matched loop. Do NOT fall back to an unfiltered search, +# as that would incorrectly restrict unrelated sessions opened in the same repo. +# Spawned agents (with different session_ids) are guided by their prompt instead. _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" -# Only fall back when NO session-matched loop was found (spawned agent case). -# If the session has its own active loop, do NOT search for another session's -# methodology analysis -- that would incorrectly restrict the current session. -if [[ -z "$_MA_LOOP_DIR" ]]; then - _MA_LOOP_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") -fi - if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then # If realpath fails (file doesn't exist yet on BSD/macOS), resolve parent dir _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") @@ -105,8 +101,10 @@ if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.m [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")" fi _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") - if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ - [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then + # Fallback to raw paths when realpath is unavailable (older macOS/BSD) + [[ -z "$_ma_real_path" ]] && _ma_real_path="$FILE_PATH" + [[ -z "$_ma_real_loop" ]] && _ma_real_loop="$_MA_LOOP_DIR" + if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") case "$_ma_basename" in methodology-analysis-report.md|methodology-analysis-done.md) diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 6d9c6c38..bddb4776 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -77,15 +77,11 @@ fi PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" +# Use only the session-matched loop. Do NOT fall back to an unfiltered search, +# as that would incorrectly restrict unrelated sessions opened in the same repo. +# Spawned agents (with different session_ids) are guided by their prompt instead. ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" - -# Only fall back when NO session-matched loop was found (spawned agent case). -# If the session has its own active loop, do NOT search for another session's -# methodology analysis -- that would incorrectly restrict the current session. _MA_CHECK_DIR="$ACTIVE_LOOP_DIR" -if [[ -z "$_MA_CHECK_DIR" ]]; then - _MA_CHECK_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") -fi if [[ -n "$_MA_CHECK_DIR" ]]; then _MA_STATE=$(resolve_active_state_file "$_MA_CHECK_DIR") @@ -98,8 +94,10 @@ if [[ -n "$_MA_CHECK_DIR" ]]; then [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")" fi _ma_real_loop=$(realpath "$_MA_CHECK_DIR" 2>/dev/null || echo "") - if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ - [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then + # Fallback to raw paths when realpath is unavailable (older macOS/BSD) + [[ -z "$_ma_real_path" ]] && _ma_real_path="$FILE_PATH" + [[ -z "$_ma_real_loop" ]] && _ma_real_loop="$_MA_CHECK_DIR" + if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") # Allowlist: only files the analysis agent needs # - round-*-summary.md: development record summaries @@ -122,7 +120,7 @@ Allowed: round-*-summary.md, round-*-review-result.md, methodology-analysis-*.md fi # Files within the project root are blocked (project-specific information) # Files outside the project root are allowed (system files, config, etc.) - _ma_project_real=$(realpath "$PROJECT_ROOT" 2>/dev/null || echo "") + _ma_project_real=$(realpath "$PROJECT_ROOT" 2>/dev/null || echo "$PROJECT_ROOT") if [[ -n "$_ma_project_real" ]]; then _ma_path_check="${_ma_real_path:-$FILE_PATH}" if [[ "$_ma_path_check" == "$_ma_project_real/"* ]] || \ diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 0239a9e3..5b116391 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -105,15 +105,11 @@ fi PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" +# Use only the session-matched loop. Do NOT fall back to an unfiltered search, +# as that would incorrectly restrict unrelated sessions opened in the same repo. +# Spawned agents (with different session_ids) are guided by their prompt instead. _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" -# Only fall back when NO session-matched loop was found (spawned agent case). -# If the session has its own active loop, do NOT search for another session's -# methodology analysis -- that would incorrectly restrict the current session. -if [[ -z "$_MA_LOOP_DIR" ]]; then - _MA_LOOP_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") -fi - if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then # If realpath fails (file doesn't exist yet on BSD/macOS), resolve parent dir _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") @@ -122,8 +118,10 @@ if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.m [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")" fi _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") - if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ - [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then + # Fallback to raw paths when realpath is unavailable (older macOS/BSD) + [[ -z "$_ma_real_path" ]] && _ma_real_path="$FILE_PATH" + [[ -z "$_ma_real_loop" ]] && _ma_real_loop="$_MA_LOOP_DIR" + if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") case "$_ma_basename" in methodology-analysis-report.md|methodology-analysis-done.md) From 5c123c2e1208b56c21532bd492e481ee964ee7a5 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:33:06 -0700 Subject: [PATCH 13/97] Expand methodology bash blocklist, require non-empty report, fix monitor status Add git switch/pull/clone/submodule/worktree and mkdir/rmdir/ln/mktemp to the methodology analysis bash command blocklist. Require methodology-analysis-report.md to have content (not just exist) before allowing the methodology analysis phase to complete. Add methodology-analysis-state.md to statusline state file resolution and show "Analyzing" status. Treat methodology-analysis as active phase in monitor color. --- hooks/lib/methodology-analysis.sh | 9 ++++++++- hooks/loop-bash-validator.sh | 6 +++--- scripts/lib/monitor-common.sh | 2 +- scripts/statusline.sh | 8 ++++++-- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/hooks/lib/methodology-analysis.sh b/hooks/lib/methodology-analysis.sh index adbde197..a7743410 100644 --- a/hooks/lib/methodology-analysis.sh +++ b/hooks/lib/methodology-analysis.sh @@ -126,11 +126,18 @@ complete_methodology_analysis() { return 1 fi - # Require the analysis report to exist (ensures the Opus agent actually ran) + # Require the analysis report to exist with content (ensures the Opus agent + # actually produced an analysis, not just an empty/truncated file) if [[ ! -f "$report_file" ]]; then echo "Warning: methodology-analysis-report.md missing, blocking completion" >&2 return 1 fi + local report_content + report_content=$(cat "$report_file" 2>/dev/null || echo "") + if [[ -z "$report_content" ]]; then + echo "Warning: methodology-analysis-report.md is empty, blocking completion" >&2 + return 1 + fi # Read exit reason (fail closed: missing marker blocks completion) if [[ ! -f "$LOOP_DIR/.methodology-exit-reason" ]]; then diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 282e6c32..1f95d18c 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -83,14 +83,14 @@ if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.m exit 0 fi # Block git commands that modify the working tree - if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])git[[:space:]]+(commit|add|reset|checkout|merge|rebase|cherry-pick|am|apply|stash|push|restore|clean|rm|mv)'; then + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])git[[:space:]]+(commit|add|reset|checkout|merge|rebase|cherry-pick|am|apply|stash|push|restore|clean|rm|mv|switch|pull|clone|submodule|worktree)'; then echo "# Bash Blocked During Methodology Analysis Git write commands are not allowed during the methodology analysis phase." >&2 exit 2 fi - # Block file manipulation commands (touch, mv, cp, rm, etc.) - if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(tee|install|touch|mv|cp|rm|dd|truncate|chmod|chown)[[:space:]]'; then + # Block file manipulation commands (touch, mv, cp, rm, mkdir, ln, etc.) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(tee|install|touch|mv|cp|rm|dd|truncate|chmod|chown|mkdir|rmdir|ln|mktemp)[[:space:]]'; then echo "# Bash Blocked During Methodology Analysis File modification commands are not allowed during the methodology analysis phase." >&2 diff --git a/scripts/lib/monitor-common.sh b/scripts/lib/monitor-common.sh index 405b25fd..5eb606d5 100644 --- a/scripts/lib/monitor-common.sh +++ b/scripts/lib/monitor-common.sh @@ -130,7 +130,7 @@ monitor_restore_terminal() { monitor_get_status_color() { local status="$1" case "$status" in - active) echo "\033[1;32m" ;; # green + active|methodology-analysis) echo "\033[1;32m" ;; # green completed) echo "\033[1;36m" ;; # cyan failed|error|timeout) echo "\033[1;31m" ;; # red cancelled) echo "\033[1;33m" ;; # yellow diff --git a/scripts/statusline.sh b/scripts/statusline.sh index b6430052..dafc0bdf 100755 --- a/scripts/statusline.sh +++ b/scripts/statusline.sh @@ -37,7 +37,9 @@ format_duration() { _resolve_rlcr_display() { local session_dir="$1" - if [[ -f "$session_dir/finalize-state.md" ]]; then + if [[ -f "$session_dir/methodology-analysis-state.md" ]]; then + echo "Analyzing" + elif [[ -f "$session_dir/finalize-state.md" ]]; then echo "Finalizing" elif [[ -f "$session_dir/state.md" ]]; then echo "Active" @@ -95,7 +97,9 @@ get_rlcr_status() { [[ -z "$dir" ]] && continue local trimmed="${dir%/}" local any_state="" - if [[ -f "$trimmed/finalize-state.md" ]]; then + if [[ -f "$trimmed/methodology-analysis-state.md" ]]; then + any_state="$trimmed/methodology-analysis-state.md" + elif [[ -f "$trimmed/finalize-state.md" ]]; then any_state="$trimmed/finalize-state.md" elif [[ -f "$trimmed/state.md" ]]; then any_state="$trimmed/state.md" From 03ac7d0e72c469ac0f82e7b28cdc7da4fb27ce9d Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:36:56 -0700 Subject: [PATCH 14/97] Tighten cancel allowlist, document spawned agent limitation, remove dead code Require cancel-rlcr-loop.sh to be a standalone command (no shell operators) to prevent chained commands from bypassing methodology analysis restrictions. Document that spawned agents are not restricted by hooks due to session_id mismatch -- their sanitization is enforced by the analysis prompt. This is an inherent limitation of the hook architecture. Remove unused find_methodology_analysis_loop function. --- hooks/lib/loop-common.sh | 37 ----------------------------------- hooks/loop-bash-validator.sh | 14 ++++++++----- hooks/loop-edit-validator.sh | 3 ++- hooks/loop-read-validator.sh | 3 ++- hooks/loop-write-validator.sh | 3 ++- 5 files changed, 15 insertions(+), 45 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 6405c4b9..21813aca 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -332,43 +332,6 @@ find_active_loop() { echo "" } -# Find any active loop directory currently in methodology analysis phase. -# Unlike find_active_loop() which returns the newest active loop (possibly the -# wrong one when multiple concurrent sessions exist), this function specifically -# searches for a loop with methodology-analysis-state.md present. -# This ensures spawned agents (which have different session_ids) always bind to -# the correct originating loop during methodology analysis. -# -# Limitation: If two loops are simultaneously in methodology analysis, this -# returns the newest one. The older session's spawned agents would bind to the -# wrong loop. This is accepted because concurrent methodology analyses are -# extremely unlikely (the phase is short-lived and requires two active RLCR -# sessions to overlap at this specific point). -# -# Args: -# $1 - loop_base_dir: path to .humanize/rlcr -# -# Outputs the directory path to stdout, or empty string if none found -find_methodology_analysis_loop() { - local loop_base_dir="$1" - - if [[ ! -d "$loop_base_dir" ]]; then - echo "" - return - fi - - local dir - while IFS= read -r dir; do - [[ -z "$dir" ]] && continue - local trimmed_dir="${dir%/}" - if [[ -f "$trimmed_dir/methodology-analysis-state.md" ]]; then - echo "$trimmed_dir" - return - fi - done < <(ls -1d "$loop_base_dir"/*/ 2>/dev/null | sort -r) - - echo "" -} # Extract current round number from state.md # Outputs the round number to stdout, defaults to 0 diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 1f95d18c..0192bf62 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -68,18 +68,22 @@ ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") # Methodology Analysis Phase Bash Restriction # ======================================== # During methodology analysis, block file-modifying bash commands. -# Only gh commands and read-only operations are allowed. +# Only read-only operations and cancel-rlcr-loop.sh are allowed. # This prevents source code modifications after Codex has signed off. -# Uses unfiltered search to also apply to spawned agents with different session_id. - +# # Use only the session-matched loop. Do NOT fall back to an unfiltered search, # as that would incorrectly restrict unrelated sessions opened in the same repo. -# Spawned agents (with different session_ids) are guided by their prompt instead. +# Limitation: Spawned agents (different session_id) are not restricted by hooks; +# their sanitization is enforced by the analysis prompt. This is an inherent +# limitation of the hook architecture which cannot distinguish spawned agents +# from unrelated sessions. _MA_BASH_DIR="$ACTIVE_LOOP_DIR" if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then # Allow cancel-rlcr-loop.sh (user must be able to cancel during this phase) - if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])([^[:space:]]*/)?cancel-rlcr-loop\.sh'; then + # Only allow standalone invocation -- reject if chained with shell operators + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])([^[:space:]]*/)?cancel-rlcr-loop\.sh' && \ + ! echo "$COMMAND_LOWER" | grep -qE '[;|&]'; then exit 0 fi # Block git commands that modify the working tree diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 3b5becde..de5ac333 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -90,7 +90,8 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" # Use only the session-matched loop. Do NOT fall back to an unfiltered search, # as that would incorrectly restrict unrelated sessions opened in the same repo. -# Spawned agents (with different session_ids) are guided by their prompt instead. +# Limitation: Spawned agents (different session_id) are not restricted by hooks; +# their sanitization is enforced by the analysis prompt. _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index bddb4776..ee33ac1d 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -79,7 +79,8 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" # Use only the session-matched loop. Do NOT fall back to an unfiltered search, # as that would incorrectly restrict unrelated sessions opened in the same repo. -# Spawned agents (with different session_ids) are guided by their prompt instead. +# Limitation: Spawned agents (different session_id) are not restricted by hooks; +# their sanitization is enforced by the analysis prompt. ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" _MA_CHECK_DIR="$ACTIVE_LOOP_DIR" diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 5b116391..45cb41c5 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -107,7 +107,8 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" # Use only the session-matched loop. Do NOT fall back to an unfiltered search, # as that would incorrectly restrict unrelated sessions opened in the same repo. -# Spawned agents (with different session_ids) are guided by their prompt instead. +# Limitation: Spawned agents (different session_id) are not restricted by hooks; +# their sanitization is enforced by the analysis prompt. _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then From 5a083299c1a5d9621ff4717299f00e0f362561be Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:41:56 -0700 Subject: [PATCH 15/97] Remove raw records from read allowlist, document bash read limitation Remove round-*-summary.md and round-*-review-result.md from the methodology analysis Read allowlist. The originating session should only read the sanitized methodology-analysis-report.md, not raw development records that contain project-specific information. The spawned agent reads raw records directly (not restricted by hooks due to different session_id). Document that read-only bash commands are intentionally not blocked during methodology analysis -- blocking them would break basic operations. The analysis prompt is the primary enforcement mechanism for sanitization. --- hooks/loop-bash-validator.sh | 13 +++++++++---- hooks/loop-read-validator.sh | 15 ++++++++------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 0192bf62..a69ce625 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -71,12 +71,17 @@ ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") # Only read-only operations and cancel-rlcr-loop.sh are allowed. # This prevents source code modifications after Codex has signed off. # +# Accepted limitations: +# - Read-only bash commands (cat, grep, find, etc.) are NOT blocked. Blocking +# them would break basic Claude operations. The analysis prompt directs Claude +# to derive user-facing content only from methodology-analysis-report.md. +# - Spawned agents (different session_id) are not restricted by hooks; their +# sanitization is enforced by the analysis prompt. This is an inherent +# limitation of the hook architecture which cannot distinguish spawned agents +# from unrelated sessions. +# # Use only the session-matched loop. Do NOT fall back to an unfiltered search, # as that would incorrectly restrict unrelated sessions opened in the same repo. -# Limitation: Spawned agents (different session_id) are not restricted by hooks; -# their sanitization is enforced by the analysis prompt. This is an inherent -# limitation of the hook architecture which cannot distinguish spawned agents -# from unrelated sessions. _MA_BASH_DIR="$ACTIVE_LOOP_DIR" if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index ee33ac1d..4c8a7f21 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -100,14 +100,15 @@ if [[ -n "$_MA_CHECK_DIR" ]]; then [[ -z "$_ma_real_loop" ]] && _ma_real_loop="$_MA_CHECK_DIR" if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") - # Allowlist: only files the analysis agent needs - # - round-*-summary.md: development record summaries - # - round-*-review-result.md: review feedback - # - methodology-analysis-report.md: the agent's own output - # - methodology-analysis-done.md: completion marker - # - methodology-analysis-state.md: state file (for parsing) + # Allowlist: only methodology artifacts (not raw development records). + # Raw records (round-*-summary.md, round-*-review-result.md) are + # intentionally excluded so the originating session cannot read + # project-specific content and must rely solely on the sanitized + # methodology-analysis-report.md for all user-facing output. + # The spawned Opus agent reads raw records directly (not restricted + # by hooks due to different session_id -- see limitation comment above). case "$_ma_basename" in - round-*-summary.md|round-*-review-result.md|methodology-analysis-report.md|methodology-analysis-done.md|methodology-analysis-state.md) + methodology-analysis-report.md|methodology-analysis-done.md|methodology-analysis-state.md) exit 0 ;; *) From 2cba950a862e496c64b68ac9e735519acd8fcb0e Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:47:07 -0700 Subject: [PATCH 16/97] Fix stale error messages in read validator methodology analysis block Update blocked-read messages to match actual allowlist (methodology-analysis-* files only). Previous messages incorrectly listed round-*-summary.md and round-*-review-result.md which were removed from the allowlist. --- hooks/loop-read-validator.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 4c8a7f21..110b66a1 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -114,8 +114,8 @@ if [[ -n "$_MA_CHECK_DIR" ]]; then *) echo "# Read Blocked During Methodology Analysis -Only analysis artifacts can be read from the loop directory during this phase. -Allowed: round-*-summary.md, round-*-review-result.md, methodology-analysis-*.md" >&2 +Only methodology artifacts can be read from the loop directory during this phase. +Allowed: methodology-analysis-report.md, methodology-analysis-done.md, methodology-analysis-state.md" >&2 exit 2 ;; esac @@ -130,8 +130,8 @@ Allowed: round-*-summary.md, round-*-review-result.md, methodology-analysis-*.md echo "# Read Blocked During Methodology Analysis Reading project files is not allowed during the methodology analysis phase. -Only analysis artifacts within the loop directory can be read. -Allowed: round-*-summary.md, round-*-review-result.md, methodology-analysis-*.md" >&2 +Only methodology artifacts within the loop directory can be read. +Allowed: methodology-analysis-report.md, methodology-analysis-done.md, methodology-analysis-state.md" >&2 exit 2 fi fi From 3eed7d69e17e683530ae178663acebf6466b9efd Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:53:48 -0700 Subject: [PATCH 17/97] Add missing template for methodology analysis state file block message The template was referenced in loop-common.sh but never created, causing the CI template-references test to fail. --- .../methodology-analysis-state-file-modification.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 prompt-template/block/methodology-analysis-state-file-modification.md diff --git a/prompt-template/block/methodology-analysis-state-file-modification.md b/prompt-template/block/methodology-analysis-state-file-modification.md new file mode 100644 index 00000000..16020c14 --- /dev/null +++ b/prompt-template/block/methodology-analysis-state-file-modification.md @@ -0,0 +1,9 @@ +# Methodology Analysis State File Modification Blocked + +You cannot modify `methodology-analysis-state.md`. This file is managed by the loop system during the Methodology Analysis Phase. + +The Methodology Analysis Phase runs before the loop fully exits. Focus on: +1. Spawning an Opus agent to analyze development records +2. Reviewing the sanitized analysis report +3. Optionally helping the user file a GitHub issue with improvement suggestions +4. Writing your completion marker to `methodology-analysis-done.md` From 13a47fb2260667a272b448e8d3c1a521f2382590 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 23:02:02 -0700 Subject: [PATCH 18/97] Fix cancel allowlist regex and move methodology handler before git-clean The cancel-rlcr-loop.sh allowlist in the bash validator matched when the script name appeared as an argument to another command (e.g. cp). Anchor the regex to the start of the command string so only direct invocations are allowed. Move the methodology analysis completion handler in the stop hook to run before the git-clean check. Writing methodology artifacts can make the working tree appear dirty when .humanize is tracked, which would block exit before the handler ever ran. --- hooks/loop-bash-validator.sh | 6 ++--- hooks/loop-codex-stop-hook.sh | 41 +++++++++++++++++++---------------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index a69ce625..a991dde5 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -85,9 +85,9 @@ ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") _MA_BASH_DIR="$ACTIVE_LOOP_DIR" if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then - # Allow cancel-rlcr-loop.sh (user must be able to cancel during this phase) - # Only allow standalone invocation -- reject if chained with shell operators - if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])([^[:space:]]*/)?cancel-rlcr-loop\.sh' && \ + # Allow cancel-rlcr-loop.sh only as the leading command (not as an argument + # to another command like cp/mv). Reject if chained with shell operators. + if echo "$COMMAND_LOWER" | grep -qE '^[[:space:]]*("?[^"]*/?)?cancel-rlcr-loop\.sh' && \ ! echo "$COMMAND_LOWER" | grep -qE '[;|&]'; then exit 0 fi diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 9339713d..191d2381 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -582,6 +582,28 @@ Split these into smaller modules before continuing." fi fi +# ======================================== +# Methodology Analysis Phase Completion Handler +# ======================================== +# When in methodology analysis phase, check if the analysis is done. +# If done, rename state to the original exit reason's terminal state. +# If not done, block and ask Claude to complete the analysis. +# All other checks (summary, bitlesson, goal tracker, max iterations) are skipped. +# IMPORTANT: This MUST run before the git-clean check, because methodology +# artifacts (.humanize/rlcr/...) may make the working tree appear dirty +# if .humanize is tracked, which would block exit before reaching this handler. + +if [[ "$IS_METHODOLOGY_ANALYSIS_PHASE" == "true" ]]; then + if complete_methodology_analysis; then + # Analysis complete, allow exit + exit 0 + else + # Analysis not yet complete, block + block_methodology_analysis_incomplete + exit 0 + fi +fi + # ======================================== # Quick Check: Git Clean and Pushed? # ======================================== @@ -682,25 +704,6 @@ Please push before exiting." fi fi -# ======================================== -# Methodology Analysis Phase Completion Handler -# ======================================== -# When in methodology analysis phase, check if the analysis is done. -# If done, rename state to the original exit reason's terminal state. -# If not done, block and ask Claude to complete the analysis. -# All other checks (summary, bitlesson, goal tracker, max iterations) are skipped. - -if [[ "$IS_METHODOLOGY_ANALYSIS_PHASE" == "true" ]]; then - if complete_methodology_analysis; then - # Analysis complete, allow exit - exit 0 - else - # Analysis not yet complete, block - block_methodology_analysis_incomplete - exit 0 - fi -fi - # ======================================== # Check Summary File Exists # ======================================== From 9b961a83365863d39d9c87a33f0ce0004cf0ed46 Mon Sep 17 00:00:00 2001 From: tastynoob <934348725@qq.com> Date: Sun, 15 Mar 2026 14:56:07 +0800 Subject: [PATCH 19/97] Harden RLCR against mainline drift --- hooks/check-todos-from-transcript.py | 29 +- hooks/lib/loop-common.sh | 235 +++++++++++++- hooks/loop-bash-validator.sh | 24 +- hooks/loop-codex-stop-hook.sh | 283 ++++++++++++++-- hooks/loop-edit-validator.sh | 63 +++- hooks/loop-read-validator.sh | 55 +++- hooks/loop-write-validator.sh | 77 +++-- .../block/finalize-contract-access.md | 7 + .../block/goal-tracker-modification.md | 29 +- prompt-template/block/mainline-drift-stop.md | 14 + .../block/mainline-verdict-missing.md | 13 + .../block/round-contract-bash-write.md | 7 + .../block/round-contract-missing.md | 13 + .../block/wrong-contract-location.md | 5 + prompt-template/claude/drift-replan-prompt.md | 68 ++++ .../claude/finalize-phase-prompt.md | 7 +- .../claude/finalize-phase-skipped-prompt.md | 7 +- .../claude/goal-tracker-update-request.md | 7 +- prompt-template/claude/next-round-prompt.md | 51 ++- .../claude/post-alignment-action-items.md | 1 + prompt-template/claude/review-phase-prompt.md | 37 ++- .../codex/full-alignment-review.md | 29 +- .../codex/goal-tracker-update-section.md | 13 +- prompt-template/codex/regular-review.md | 21 +- scripts/humanize.sh | 77 ++++- scripts/lib/monitor-common.sh | 49 ++- scripts/setup-rlcr-loop.sh | 304 +++++++++++++++--- .../test-goal-tracker-robustness.sh | 59 ++++ .../robustness/test-hook-system-robustness.sh | 192 ++++++++++- .../test-setup-scripts-robustness.sh | 64 ++++ .../robustness/test-state-file-robustness.sh | 49 +++ tests/test-agent-teams.sh | 57 ++++ tests/test-allowlist-validators.sh | 105 +++++- tests/test-finalize-phase.sh | 209 ++++++++++++ tests/test-plan-file-hooks.sh | 63 +++- tests/test-task-tag-routing.sh | 11 + tests/test-todo-checker.sh | 81 +++++ 37 files changed, 2242 insertions(+), 173 deletions(-) create mode 100644 prompt-template/block/finalize-contract-access.md create mode 100644 prompt-template/block/mainline-drift-stop.md create mode 100644 prompt-template/block/mainline-verdict-missing.md create mode 100644 prompt-template/block/round-contract-bash-write.md create mode 100644 prompt-template/block/round-contract-missing.md create mode 100644 prompt-template/block/wrong-contract-location.md create mode 100644 prompt-template/claude/drift-replan-prompt.md diff --git a/hooks/check-todos-from-transcript.py b/hooks/check-todos-from-transcript.py index af577a5c..31ec6e5e 100755 --- a/hooks/check-todos-from-transcript.py +++ b/hooks/check-todos-from-transcript.py @@ -15,11 +15,26 @@ echo '{"session_id": "...", "transcript_path": "/path/to/transcript.jsonl"}' | python3 check-todos-from-transcript.py """ import json +import re import sys from pathlib import Path from typing import List, Tuple +LANE_PREFIX_PATTERN = re.compile(r"^\s*\[(mainline|blocking|queued)\](?:\s|$)", re.IGNORECASE) + + +def classify_lane(*parts: str) -> str: + """Infer the task lane from content, defaulting to blocking for safety.""" + for part in parts: + if not part: + continue + match = LANE_PREFIX_PATTERN.match(part) + if match: + return match.group(1).lower() + return "blocking" + + def extract_tool_calls_from_entry(entry: dict) -> List[Tuple[str, dict]]: """ Extract tool calls from a transcript entry. @@ -92,10 +107,14 @@ def find_incomplete_todos_from_transcript(transcript_path: Path) -> List[dict]: status = todo.get("status", "") content = todo.get("content", "") if status != "completed": + lane = classify_lane(content) + if lane == "queued": + continue incomplete.append({ "status": status, "content": content, "source": "todo", + "lane": lane, }) return incomplete @@ -134,11 +153,15 @@ def find_incomplete_tasks_from_directory(session_id: str, tasks_base_dir: str = description = task.get("description", "") task_id = task_file.stem # Filename without .json content = subject or description or f"Task {task_id}" + lane = classify_lane(subject, description) + if lane == "queued": + continue incomplete.append({ "status": status, "content": content, "source": "task", "task_id": task_id, + "lane": lane, }) except (json.JSONDecodeError, OSError): # Skip malformed or unreadable task files @@ -184,11 +207,13 @@ def main(): status = item.get("status", "unknown") content = item.get("content", "") source = item.get("source", "unknown") + lane = item.get("lane", "blocking") + lane_marker = f"[{lane}]" if source == "task": task_id = item.get("task_id", "?") - output_lines.append(f" - [{status}] (Task #{task_id}) {content}") + output_lines.append(f" - [{status}] {lane_marker} (Task #{task_id}) {content}") else: - output_lines.append(f" - [{status}] {content}") + output_lines.append(f" - [{status}] {lane_marker} {content}") # Output marker and incomplete items both to stdout print("INCOMPLETE_TODOS") diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 5151018f..b6bc2e5b 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -38,6 +38,17 @@ readonly FIELD_FULL_REVIEW_ROUND="full_review_round" readonly FIELD_ASK_CODEX_QUESTION="ask_codex_question" readonly FIELD_SESSION_ID="session_id" readonly FIELD_AGENT_TEAMS="agent_teams" +readonly FIELD_MAINLINE_STALL_COUNT="mainline_stall_count" +readonly FIELD_LAST_MAINLINE_VERDICT="last_mainline_verdict" +readonly FIELD_DRIFT_STATUS="drift_status" + +readonly MAINLINE_VERDICT_ADVANCED="advanced" +readonly MAINLINE_VERDICT_STALLED="stalled" +readonly MAINLINE_VERDICT_REGRESSED="regressed" +readonly MAINLINE_VERDICT_UNKNOWN="unknown" + +readonly DRIFT_STATUS_NORMAL="normal" +readonly DRIFT_STATUS_REPLAN_REQUIRED="replan_required" # Default Codex configuration (single source of truth - all scripts reference this) # Scripts can pre-set DEFAULT_CODEX_MODEL/DEFAULT_CODEX_EFFORT before sourcing to override. @@ -364,6 +375,9 @@ _parse_state_fields() { STATE_ASK_CODEX_QUESTION=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_ASK_CODEX_QUESTION}:" | sed "s/${FIELD_ASK_CODEX_QUESTION}: *//" | tr -d ' ' || true) STATE_SESSION_ID=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_SESSION_ID}:" | sed "s/${FIELD_SESSION_ID}: *//" || true) STATE_AGENT_TEAMS=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_AGENT_TEAMS}:" | sed "s/${FIELD_AGENT_TEAMS}: *//" | tr -d ' ' || true) + STATE_MAINLINE_STALL_COUNT=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_MAINLINE_STALL_COUNT}:" | sed "s/${FIELD_MAINLINE_STALL_COUNT}: *//" | tr -d ' ' || true) + STATE_LAST_MAINLINE_VERDICT=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_LAST_MAINLINE_VERDICT}:" | sed "s/${FIELD_LAST_MAINLINE_VERDICT}: *//" | tr -d ' ' || true) + STATE_DRIFT_STATUS=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_DRIFT_STATUS}:" | sed "s/${FIELD_DRIFT_STATUS}: *//" | tr -d ' ' || true) } # Parse state file frontmatter and set variables (tolerant mode with defaults) @@ -384,6 +398,9 @@ _parse_state_fields() { # STATE_FULL_REVIEW_ROUND - interval for Full Alignment Check (default: 5) # STATE_ASK_CODEX_QUESTION - "true" or "false" (v1.6.5+) # STATE_AGENT_TEAMS - "true" or "false" +# STATE_MAINLINE_STALL_COUNT - consecutive stalled/regressed implementation rounds +# STATE_LAST_MAINLINE_VERDICT - advanced/stalled/regressed/unknown +# STATE_DRIFT_STATUS - normal/replan_required # Returns: 0 on success, 1 if file not found # Note: For strict validation, use parse_state_file_strict() instead parse_state_file() { @@ -406,6 +423,9 @@ parse_state_file() { STATE_FULL_REVIEW_ROUND="${STATE_FULL_REVIEW_ROUND:-5}" STATE_ASK_CODEX_QUESTION="${STATE_ASK_CODEX_QUESTION:-true}" STATE_AGENT_TEAMS="${STATE_AGENT_TEAMS:-false}" + STATE_MAINLINE_STALL_COUNT="${STATE_MAINLINE_STALL_COUNT:-0}" + STATE_LAST_MAINLINE_VERDICT="${STATE_LAST_MAINLINE_VERDICT:-$MAINLINE_VERDICT_UNKNOWN}" + STATE_DRIFT_STATUS="${STATE_DRIFT_STATUS:-$DRIFT_STATUS_NORMAL}" # STATE_REVIEW_STARTED left as-is (empty if missing, to allow schema validation) return 0 @@ -481,10 +501,116 @@ parse_state_file_strict() { STATE_FULL_REVIEW_ROUND="${STATE_FULL_REVIEW_ROUND:-5}" STATE_ASK_CODEX_QUESTION="${STATE_ASK_CODEX_QUESTION:-true}" STATE_AGENT_TEAMS="${STATE_AGENT_TEAMS:-false}" + STATE_MAINLINE_STALL_COUNT="${STATE_MAINLINE_STALL_COUNT:-0}" + STATE_LAST_MAINLINE_VERDICT="${STATE_LAST_MAINLINE_VERDICT:-$MAINLINE_VERDICT_UNKNOWN}" + STATE_DRIFT_STATUS="${STATE_DRIFT_STATUS:-$DRIFT_STATUS_NORMAL}" return 0 } +# Normalize mainline progress verdict to a safe enum. +# Usage: normalize_mainline_progress_verdict "ADVANCED" +normalize_mainline_progress_verdict() { + local verdict_lower + verdict_lower=$(printf '%s' "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]') + + case "$verdict_lower" in + "$MAINLINE_VERDICT_ADVANCED"|"$MAINLINE_VERDICT_STALLED"|"$MAINLINE_VERDICT_REGRESSED") + echo "$verdict_lower" + ;; + *) + echo "$MAINLINE_VERDICT_UNKNOWN" + ;; + esac +} + +# Normalize drift status to a safe enum. +# Usage: normalize_drift_status "replan_required" +normalize_drift_status() { + local status_lower + status_lower=$(printf '%s' "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]') + + case "$status_lower" in + "$DRIFT_STATUS_REPLAN_REQUIRED") + echo "$DRIFT_STATUS_REPLAN_REQUIRED" + ;; + *) + echo "$DRIFT_STATUS_NORMAL" + ;; + esac +} + +# Extract "Mainline Progress Verdict" from Codex review content. +# Outputs one of: advanced, stalled, regressed, unknown +# Usage: extract_mainline_progress_verdict "$review_content" +extract_mainline_progress_verdict() { + local review_content="$1" + local verdict_line + local verdict_value + + verdict_line=$(printf '%s\n' "$review_content" | grep -Ei 'Mainline Progress Verdict:[[:space:]]*(ADVANCED|STALLED|REGRESSED)([^A-Za-z]|$)' | tail -1 || true) + if [[ -z "$verdict_line" ]]; then + echo "$MAINLINE_VERDICT_UNKNOWN" + return + fi + + verdict_value=$(printf '%s\n' "$verdict_line" | sed -E 's/.*Mainline Progress Verdict:[[:space:]]*(ADVANCED|STALLED|REGRESSED).*/\1/I') + normalize_mainline_progress_verdict "$verdict_value" +} + +# Upsert simple YAML frontmatter fields in a state file. +# Values must not contain newlines. +# Usage: upsert_state_fields "/path/to/state.md" "field=value" "other=value" +upsert_state_fields() { + local state_file="$1" + shift + + local temp_file="${state_file}.tmp.$$" + + awk -v assignments="$*" ' + BEGIN { + count = split(assignments, pairs, " "); + for (i = 1; i <= count; i++) { + split(pairs[i], kv, "="); + keys[kv[1]] = kv[2]; + order[i] = kv[1]; + } + separator_count = 0; + } + { + if ($0 == "---") { + separator_count++; + if (separator_count == 2) { + for (i = 1; i <= count; i++) { + key = order[i]; + if (!(key in seen)) { + print key ": " keys[key]; + seen[key] = 1; + } + } + } + print; + next; + } + + handled = 0; + for (i = 1; i <= count; i++) { + key = order[i]; + if ($0 ~ ("^" key ":")) { + print key ": " keys[key]; + seen[key] = 1; + handled = 1; + break; + } + } + + if (!handled) { + print; + } + } + ' "$state_file" > "$temp_file" && mv "$temp_file" "$state_file" +} + # Detect review issues from codex review log file # Returns: # 0 - issues found (caller should continue review loop) @@ -562,7 +688,7 @@ to_lower() { } # Check if a path (lowercase) matches a round file pattern -# Usage: is_round_file "$lowercase_path" "summary|prompt|todos" +# Usage: is_round_file "$lowercase_path" "summary|prompt|todos|contract" is_round_file_type() { local path_lower="$1" local file_type="$2" @@ -579,7 +705,7 @@ extract_round_number() { filename_lower=$(to_lower "$filename") # Use sed for portable regex extraction (works in both bash and zsh) - echo "$filename_lower" | sed -n 's/.*round-\([0-9][0-9]*\)-\(summary\|prompt\|todos\)\.md$/\1/p' + echo "$filename_lower" | sed -n 's/.*round-\([0-9][0-9]*\)-\(summary\|prompt\|todos\|contract\)\.md$/\1/p' } # Check if a file is in the allowlist for the active loop @@ -643,6 +769,21 @@ You cannot modify finalize-state.md. This file is managed by the loop system dur load_and_render_safe "$TEMPLATE_DIR" "block/finalize-state-file-modification.md" "$fallback" } +# Standard message for blocking round contract access during Finalize Phase +# Usage: finalize_contract_blocked_message "read" +finalize_contract_blocked_message() { + local action="$1" + local fallback="# Finalize Contract Access Blocked + +There is no active round contract during the Finalize Phase. + +Do not {{ACTION}} historical round contract files. +Use finalize-summary.md for finalize-only notes and goal-tracker.md for current state." + + load_and_render_safe "$TEMPLATE_DIR" "block/finalize-contract-access.md" "$fallback" \ + "ACTION=$action" +} + # Standard message for blocking summary file modifications via Bash # Usage: summary_bash_blocked_message "$correct_summary_path" summary_bash_blocked_message() { @@ -671,6 +812,79 @@ is_goal_tracker_path() { echo "$path_lower" | grep -qE 'goal-tracker\.md$' } +# Extract the immutable section from a goal-tracker content stream. +# Supports both current trackers (with --- separator) and older trackers +# that jump directly from IMMUTABLE SECTION to MUTABLE SECTION. +extract_goal_tracker_immutable_from_stream() { + awk ' + /^## IMMUTABLE SECTION[[:space:]]*$/ { capture=1 } + capture && /^## MUTABLE SECTION[[:space:]]*$/ { exit } + capture && /^---[[:space:]]*$/ { exit } + capture { print } + ' +} + +# Extract the immutable section from an on-disk goal-tracker file. +# Usage: extract_goal_tracker_immutable_from_file "/path/to/goal-tracker.md" +extract_goal_tracker_immutable_from_file() { + local tracker_file="$1" + if [[ ! -f "$tracker_file" ]]; then + return 1 + fi + extract_goal_tracker_immutable_from_stream < "$tracker_file" +} + +# Extract the immutable section from an in-memory goal-tracker string. +# Usage: extract_goal_tracker_immutable_from_text "$content" +extract_goal_tracker_immutable_from_text() { + local tracker_content="$1" + printf '%s' "$tracker_content" | extract_goal_tracker_immutable_from_stream +} + +# Check whether a proposed goal-tracker update preserves the immutable section. +# Usage: goal_tracker_mutable_update_allowed "/path/to/current.md" "$new_content" +goal_tracker_mutable_update_allowed() { + local tracker_file="$1" + local updated_content="$2" + + local current_immutable="" + local updated_immutable="" + current_immutable=$(extract_goal_tracker_immutable_from_file "$tracker_file" 2>/dev/null || true) + updated_immutable=$(extract_goal_tracker_immutable_from_text "$updated_content" 2>/dev/null || true) + + [[ -n "$current_immutable" ]] || return 1 + [[ "$current_immutable" == "$updated_immutable" ]] +} + +# Render the post-edit contents for a literal Edit operation. +# Returns non-zero if the edit preview cannot be produced. +# Usage: preview_edit_result "/path/to/file" "$old_string" "$new_string" "true|false" +preview_edit_result() { + local file_path="$1" + local old_string="$2" + local new_string="$3" + local replace_all="${4:-false}" + + command -v perl >/dev/null 2>&1 || return 1 + + FILE_PATH="$file_path" \ + OLD_STRING="$old_string" \ + NEW_STRING="$new_string" \ + REPLACE_ALL="$replace_all" \ + perl -0pe ' + BEGIN { + $old = $ENV{"OLD_STRING"}; + $new = $ENV{"NEW_STRING"}; + $replace_all = $ENV{"REPLACE_ALL"} eq "true"; + } + if ($replace_all) { + s/\Q$old\E/$new/g; + } else { + s/\Q$old\E/$new/; + } + ' "$file_path" +} + # Check if a path (lowercase) targets state.md is_state_file_path() { local path_lower="$1" @@ -1275,17 +1489,24 @@ command_modifies_file() { } # Standard message for blocking goal-tracker modifications after Round 0 -# Usage: goal_tracker_blocked_message "$current_round" "$summary_file_path" +# Usage: goal_tracker_blocked_message "$current_round" "$correct_goal_tracker_path" goal_tracker_blocked_message() { local current_round="$1" - local summary_file="$2" - local fallback="# Goal Tracker Modification Blocked (Round {{CURRENT_ROUND}}) + local correct_path="$2" + local fallback="# Goal Tracker Update Blocked (Round {{CURRENT_ROUND}}) + +After Round 0, you may update only the **MUTABLE SECTION** of the active goal tracker. + +Use Write or Edit on: {{CORRECT_PATH}} -After Round 0, only Codex can modify the Goal Tracker. Include a Goal Tracker Update Request in your summary: {{SUMMARY_FILE}}" +Rules: +- Keep the **IMMUTABLE SECTION** unchanged +- Do not modify `goal-tracker.md` via Bash +- Do not write to an old loop session's tracker" load_and_render_safe "$TEMPLATE_DIR" "block/goal-tracker-modification.md" "$fallback" \ "CURRENT_ROUND=$current_round" \ - "SUMMARY_FILE=$summary_file" + "CORRECT_PATH=$correct_path" } # End the loop by renaming state.md to indicate exit reason diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 948612e1..7a5fdec1 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -6,7 +6,7 @@ # - cat/echo/printf > file.md (redirection) # - tee file.md # - sed -i file.md (in-place edit) -# - goal-tracker.md modifications after Round 0 +# - goal-tracker.md modifications via Bash # - PR loop state.md modifications # - PR loop read-only file modifications (pr-comment, prompt, codex-prompt, etc.) # @@ -359,12 +359,11 @@ fi # Round > 0: prompt to put request in summary if command_modifies_file "$COMMAND_LOWER" "goal-tracker\.md"; then + GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" if [[ "$CURRENT_ROUND" -eq 0 ]]; then - GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" goal_tracker_bash_blocked_message "$GOAL_TRACKER_PATH" >&2 else - SUMMARY_FILE="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" - goal_tracker_blocked_message "$CURRENT_ROUND" "$SUMMARY_FILE" >&2 + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 fi exit 2 fi @@ -390,6 +389,23 @@ if command_modifies_file "$COMMAND_LOWER" "round-[0-9]+-summary\.md"; then exit 2 fi +# ======================================== +# Block Round Contract File Modifications (All Rounds) +# ======================================== +# Round contracts should be written using Write or Edit tools so round scoping +# stays aligned with the current loop state. + +if command_modifies_file "$COMMAND_LOWER" "round-[0-9]+-contract\.md"; then + CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-contract.md" + FALLBACK="# Round Contract Bash Write Blocked + +Do not use Bash commands to modify round contract files. +Use the Write or Edit tool instead: {{CORRECT_PATH}}" + load_and_render_safe "$TEMPLATE_DIR" "block/round-contract-bash-write.md" "$FALLBACK" \ + "CORRECT_PATH=$CORRECT_PATH" >&2 + exit 2 +fi + # ======================================== # Block Todos File Modifications (All Rounds) # ======================================== diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 25142818..95783918 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -148,6 +148,9 @@ fi if [[ "$BITLESSON_ALLOW_EMPTY_NONE" != "true" && "$BITLESSON_ALLOW_EMPTY_NONE" != "false" ]]; then BITLESSON_ALLOW_EMPTY_NONE="true" fi +MAINLINE_STALL_COUNT="${STATE_MAINLINE_STALL_COUNT:-0}" +LAST_MAINLINE_VERDICT="${STATE_LAST_MAINLINE_VERDICT:-$MAINLINE_VERDICT_UNKNOWN}" +DRIFT_STATUS="${STATE_DRIFT_STATUS:-$DRIFT_STATUS_NORMAL}" # Re-validate Codex Model and Effort for YAML safety (in case state.md was manually edited) # Use same validation patterns as setup-rlcr-loop.sh if [[ ! "$CODEX_EXEC_MODEL" =~ ^[a-zA-Z0-9._-]+$ ]]; then @@ -189,6 +192,13 @@ if [[ ! "$MAX_ITERATIONS" =~ ^[0-9]+$ ]]; then MAX_ITERATIONS=42 fi +if [[ ! "$MAINLINE_STALL_COUNT" =~ ^[0-9]+$ ]]; then + echo "Warning: Invalid mainline_stall_count '$MAINLINE_STALL_COUNT', defaulting to 0" >&2 + MAINLINE_STALL_COUNT=0 +fi +LAST_MAINLINE_VERDICT=$(normalize_mainline_progress_verdict "$LAST_MAINLINE_VERDICT") +DRIFT_STATUS=$(normalize_drift_status "$DRIFT_STATUS") + # ======================================== # Quick-check 0: Schema Validation (v1.1.2+ fields) # ======================================== @@ -682,8 +692,10 @@ fi # In Finalize Phase, expect finalize-summary.md instead of round-N-summary.md if [[ "$IS_FINALIZE_PHASE" == "true" ]]; then SUMMARY_FILE="$LOOP_DIR/finalize-summary.md" + ROUND_CONTRACT_FILE="" else SUMMARY_FILE="$LOOP_DIR/round-${CURRENT_ROUND}-summary.md" + ROUND_CONTRACT_FILE="$LOOP_DIR/round-${CURRENT_ROUND}-contract.md" fi if [[ ! -f "$SUMMARY_FILE" ]]; then @@ -713,6 +725,36 @@ Please write your work summary to: {{SUMMARY_FILE}}" exit 0 fi +# Check Round Contract Exists +# ======================================== + +if [[ "$IS_FINALIZE_PHASE" != "true" ]]; then + if [[ ! -f "$ROUND_CONTRACT_FILE" ]]; then + FALLBACK="# Round Contract Missing + +Before trying to exit, write the current round contract to: {{ROUND_CONTRACT_FILE}} + +The round contract must restate: +- The single mainline objective for this round +- The target ACs +- Which side issues are truly blocking +- Which side issues are queued and out of scope +- The success criteria for this round" + REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/round-contract-missing.md" "$FALLBACK" \ + "ROUND_CONTRACT_FILE=$ROUND_CONTRACT_FILE") + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Round contract missing for round $CURRENT_ROUND" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi +fi + # ======================================== # Check BitLesson Delta Section (all non-finalize rounds) # ======================================== @@ -742,7 +784,7 @@ GOAL_TRACKER_FILE="$LOOP_DIR/goal-tracker.md" # Skip this check in Finalize Phase, Review Phase, or when review_started is already true (skip-impl mode) # - Finalize Phase: goal tracker was already initialized before COMPLETE -# - Review Phase (review_started=true): skip-impl mode skips implementation, no goal tracker needed +# - Review Phase: later rounds may update only the mutable section, so Round 0 placeholder checks no longer apply if [[ "$IS_FINALIZE_PHASE" != "true" ]] && [[ "$REVIEW_STARTED" != "true" ]] && [[ "$CURRENT_ROUND" -eq 0 ]] && [[ -f "$GOAL_TRACKER_FILE" ]]; then # Check if goal-tracker.md still contains placeholder text # Extract each section and check for generic placeholder pattern within that section @@ -1235,6 +1277,79 @@ Follow the plan's per-task routing tags strictly: ROUTING_EOF } +# Stop the loop when mainline progress has stalled for too many consecutive rounds. +# Arguments: $1=stall_count, $2=last_verdict +stop_for_mainline_drift() { + local stall_count="$1" + local last_verdict="$2" + + upsert_state_fields "$STATE_FILE" \ + "${FIELD_MAINLINE_STALL_COUNT}=${stall_count}" \ + "${FIELD_LAST_MAINLINE_VERDICT}=${last_verdict}" \ + "${FIELD_DRIFT_STATUS}=${DRIFT_STATUS_REPLAN_REQUIRED}" + + local fallback="# Mainline Drift Circuit Breaker + +The RLCR loop has been stopped because the mainline failed to advance for {{STALL_COUNT}} consecutive implementation rounds. + +- Last mainline verdict: {{LAST_VERDICT}} +- Drift status: replan_required + +This loop should not continue automatically. Revisit the original plan, recover the round contract, and restart with a narrower mainline objective." + local reason + reason=$(load_and_render_safe "$TEMPLATE_DIR" "block/mainline-drift-stop.md" "$fallback" \ + "STALL_COUNT=$stall_count" \ + "LAST_VERDICT=$last_verdict" \ + "PLAN_FILE=$PLAN_FILE") + + end_loop "$LOOP_DIR" "$STATE_FILE" "$EXIT_STOP" + + jq -n \ + --arg reason "$reason" \ + --arg msg "Loop: Stopped - mainline drift circuit breaker triggered" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} + +# Block exit when implementation review output omits the required mainline verdict. +# Arguments: $1=review_result_file, $2=review_prompt_file +block_missing_mainline_verdict() { + local review_result_file="$1" + local review_prompt_file="$2" + + local fallback="# Mainline Verdict Missing + +The implementation review output is missing the required line: + +\`Mainline Progress Verdict: ADVANCED / STALLED / REGRESSED\` + +Humanize cannot safely update drift state or choose the correct next-round prompt without this verdict. + +Retry the exit so Codex reruns the implementation review. + +Files: +- Review result: {{REVIEW_RESULT_FILE}} +- Review prompt: {{REVIEW_PROMPT_FILE}}" + local reason + reason=$(load_and_render_safe "$TEMPLATE_DIR" "block/mainline-verdict-missing.md" "$fallback" \ + "REVIEW_RESULT_FILE=$review_result_file" \ + "REVIEW_PROMPT_FILE=$review_prompt_file") + + jq -n \ + --arg reason "$reason" \ + --arg msg "Loop: Blocked - implementation review missing Mainline Progress Verdict" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} + # Continue review loop when issues are found # Arguments: $1=round_number, $2=review_content continue_review_loop_with_issues() { @@ -1273,6 +1388,7 @@ continue_review_loop_with_issues() { - Notes: [what changed and why] EOF fi + local next_contract_file="$LOOP_DIR/round-${round}-contract.md" local fallback="# Code Review Findings @@ -1284,14 +1400,35 @@ You are in the **Review Phase** of the RLCR loop. Codex has performed a code rev ## Instructions -1. Address all issues marked with [P0-9] severity markers -2. Focus on fixes only - do not add new features -3. Commit your changes after fixing the issues -4. Write your summary to: {{SUMMARY_FILE}}" +1. Re-anchor on the original plan and current goal tracker before changing code +2. Refresh the round contract at {{ROUND_CONTRACT_FILE}} +3. Address only the issues that are truly blocking the current mainline objective or code-review acceptance +4. Record non-blocking follow-up items as queued, not as the main goal +5. Commit your changes after fixing the issues +6. Write your summary to: {{SUMMARY_FILE}}" load_and_render_safe "$TEMPLATE_DIR" "claude/review-phase-prompt.md" "$fallback" \ "REVIEW_CONTENT=$review_content" \ - "SUMMARY_FILE=$next_summary_file" > "$next_prompt_file" + "SUMMARY_FILE=$next_summary_file" \ + "BITLESSON_FILE=$BITLESSON_FILE" \ + "PLAN_FILE=$PLAN_FILE" \ + "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ + "ROUND_CONTRACT_FILE=$next_contract_file" \ + "CURRENT_ROUND=$round" > "$next_prompt_file" + if [[ "$BITLESSON_REQUIRED" == "true" ]] && ! grep -q 'bitlesson-selector' "$next_prompt_file"; then + cat >> "$next_prompt_file" << EOF + +## BitLesson Selection (REQUIRED FOR EACH FIX TASK) + +Before implementing each fix task, you MUST: + +1. Read @$BITLESSON_FILE +2. Run \`bitlesson-selector\` for each fix task/sub-task to select relevant lesson IDs +3. Follow the selected lesson IDs (or \`NONE\`) during implementation + +Reference: @$BITLESSON_FILE +EOF + fi append_task_tag_routing_note "$next_prompt_file" jq -n \ @@ -1536,6 +1673,53 @@ REVIEW_CONTENT=$(cat "$REVIEW_RESULT_FILE") LAST_LINE=$(echo "$REVIEW_CONTENT" | grep -v '^[[:space:]]*$' | tail -1) LAST_LINE_TRIMMED=$(echo "$LAST_LINE" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') +NEXT_MAINLINE_STALL_COUNT="$MAINLINE_STALL_COUNT" +NEXT_LAST_MAINLINE_VERDICT="$LAST_MAINLINE_VERDICT" +NEXT_DRIFT_STATUS="$DRIFT_STATUS" +DRIFT_REPLAN_REQUIRED=false +MAINLINE_DRIFT_STOP=false + +if [[ "$REVIEW_STARTED" != "true" ]]; then + EXTRACTED_MAINLINE_VERDICT=$(extract_mainline_progress_verdict "$REVIEW_CONTENT") + + if [[ "$LAST_LINE_TRIMMED" != "$MARKER_STOP" ]] && [[ "$EXTRACTED_MAINLINE_VERDICT" == "$MAINLINE_VERDICT_UNKNOWN" ]]; then + echo "Implementation review output is missing Mainline Progress Verdict. Blocking exit for safety." >&2 + block_missing_mainline_verdict "$REVIEW_RESULT_FILE" "$REVIEW_PROMPT_FILE" + fi + + case "$EXTRACTED_MAINLINE_VERDICT" in + "$MAINLINE_VERDICT_ADVANCED") + NEXT_MAINLINE_STALL_COUNT=0 + NEXT_LAST_MAINLINE_VERDICT="$MAINLINE_VERDICT_ADVANCED" + NEXT_DRIFT_STATUS="$DRIFT_STATUS_NORMAL" + ;; + "$MAINLINE_VERDICT_STALLED"|"$MAINLINE_VERDICT_REGRESSED") + NEXT_MAINLINE_STALL_COUNT=$((MAINLINE_STALL_COUNT + 1)) + NEXT_LAST_MAINLINE_VERDICT="$EXTRACTED_MAINLINE_VERDICT" + if [[ "$NEXT_MAINLINE_STALL_COUNT" -ge 2 ]]; then + NEXT_DRIFT_STATUS="$DRIFT_STATUS_REPLAN_REQUIRED" + DRIFT_REPLAN_REQUIRED=true + else + NEXT_DRIFT_STATUS="$DRIFT_STATUS_NORMAL" + fi + if [[ "$NEXT_MAINLINE_STALL_COUNT" -ge 3 ]]; then + MAINLINE_DRIFT_STOP=true + fi + ;; + *) + : + ;; + esac + + if [[ "$LAST_LINE_TRIMMED" == "$MARKER_COMPLETE" ]]; then + NEXT_MAINLINE_STALL_COUNT=0 + NEXT_LAST_MAINLINE_VERDICT="$MAINLINE_VERDICT_ADVANCED" + NEXT_DRIFT_STATUS="$DRIFT_STATUS_NORMAL" + DRIFT_REPLAN_REQUIRED=false + MAINLINE_DRIFT_STOP=false + fi +fi + # Handle COMPLETE - enter Review Phase or Finalize Phase if [[ "$LAST_LINE_TRIMMED" == "$MARKER_COMPLETE" ]]; then # In review phase, COMPLETE signal is ignored - only absence of [P0-9] triggers finalize @@ -1563,10 +1747,12 @@ if [[ "$LAST_LINE_TRIMMED" == "$MARKER_COMPLETE" ]]; then else echo "Implementation complete. Entering Review Phase..." >&2 - # Update state to indicate review phase has started - TEMP_FILE="${STATE_FILE}.tmp.$$" - sed "s/^review_started: .*/review_started: true/" "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" + # Update state to indicate review phase has started and clear drift counters. + upsert_state_fields "$STATE_FILE" \ + "${FIELD_REVIEW_STARTED}=true" \ + "${FIELD_MAINLINE_STALL_COUNT}=0" \ + "${FIELD_LAST_MAINLINE_VERDICT}=${MAINLINE_VERDICT_ADVANCED}" \ + "${FIELD_DRIFT_STATUS}=${DRIFT_STATUS_NORMAL}" REVIEW_STARTED="true" # Create marker file to validate review phase was properly entered @@ -1614,6 +1800,11 @@ Use \`/humanize:cancel-rlcr-loop\` to end this loop." run_and_handle_code_review "$((CURRENT_ROUND + 1))" "Loop: Finalize Phase - Code review passed" fi +if [[ "$MAINLINE_DRIFT_STOP" == "true" ]] && [[ "$LAST_LINE_TRIMMED" != "$MARKER_STOP" ]] && [[ "$LAST_LINE_TRIMMED" != "$MARKER_COMPLETE" ]]; then + echo "Mainline progress stalled for $NEXT_MAINLINE_STALL_COUNT consecutive rounds. Triggering drift circuit breaker." >&2 + stop_for_mainline_drift "$NEXT_MAINLINE_STALL_COUNT" "$NEXT_LAST_MAINLINE_VERDICT" +fi + # Handle STOP - circuit breaker triggered if [[ "$LAST_LINE_TRIMMED" == "$MARKER_STOP" ]]; then echo "" >&2 @@ -1649,9 +1840,11 @@ fi # ======================================== # Update state file for next round -TEMP_FILE="${STATE_FILE}.tmp.$$" -sed "s/^current_round: .*/current_round: $NEXT_ROUND/" "$STATE_FILE" > "$TEMP_FILE" -mv "$TEMP_FILE" "$STATE_FILE" +upsert_state_fields "$STATE_FILE" \ + "${FIELD_CURRENT_ROUND}=${NEXT_ROUND}" \ + "${FIELD_MAINLINE_STALL_COUNT}=${NEXT_MAINLINE_STALL_COUNT}" \ + "${FIELD_LAST_MAINLINE_VERDICT}=${NEXT_LAST_MAINLINE_VERDICT}" \ + "${FIELD_DRIFT_STATUS}=${NEXT_DRIFT_STATUS}" # Create next round prompt NEXT_PROMPT_FILE="$LOOP_DIR/round-${NEXT_ROUND}-prompt.md" @@ -1678,6 +1871,7 @@ if [[ ! -f "$NEXT_SUMMARY_FILE" ]]; then - Notes: [what changed and why] EOF fi +NEXT_CONTRACT_FILE="$LOOP_DIR/round-${NEXT_ROUND}-contract.md" # Build the next round prompt from templates NEXT_ROUND_FALLBACK="# Next Round Instructions @@ -1692,12 +1886,60 @@ Before executing tasks in this round: ## Codex Review {{REVIEW_CONTENT}} -Reference: {{PLAN_FILE}}, {{GOAL_TRACKER_FILE}}, {{BITLESSON_FILE}}" -load_and_render_safe "$TEMPLATE_DIR" "claude/next-round-prompt.md" "$NEXT_ROUND_FALLBACK" \ - "PLAN_FILE=$PLAN_FILE" \ - "REVIEW_CONTENT=$REVIEW_CONTENT" \ - "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ - "BITLESSON_FILE=$BITLESSON_FILE" > "$NEXT_PROMPT_FILE" +Reference: {{PLAN_FILE}}, {{GOAL_TRACKER_FILE}}, {{ROUND_CONTRACT_FILE}}, {{BITLESSON_FILE}}" +DRIFT_REPLAN_FALLBACK="# Drift Recovery Required + +The mainline has not advanced for {{STALL_COUNT}} consecutive implementation rounds. + +Last mainline verdict: {{LAST_MAINLINE_VERDICT}} + +Before writing code: +- Re-read @{{PLAN_FILE}} +- Re-read @{{GOAL_TRACKER_FILE}} +- Re-read the recent round summaries and review results +- Rewrite @{{ROUND_CONTRACT_FILE}} with a recovery-focused mainline objective + +Do not spend this round clearing queued work. Recover mainline progress first. + +## Codex Review +{{REVIEW_CONTENT}}" + +if [[ "$DRIFT_REPLAN_REQUIRED" == "true" ]]; then + load_and_render_safe "$TEMPLATE_DIR" "claude/drift-replan-prompt.md" "$DRIFT_REPLAN_FALLBACK" \ + "PLAN_FILE=$PLAN_FILE" \ + "REVIEW_CONTENT=$REVIEW_CONTENT" \ + "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ + "BITLESSON_FILE=$BITLESSON_FILE" \ + "ROUND_CONTRACT_FILE=$NEXT_CONTRACT_FILE" \ + "CURRENT_ROUND=$NEXT_ROUND" \ + "STALL_COUNT=$NEXT_MAINLINE_STALL_COUNT" \ + "LAST_MAINLINE_VERDICT=$NEXT_LAST_MAINLINE_VERDICT" > "$NEXT_PROMPT_FILE" +else + load_and_render_safe "$TEMPLATE_DIR" "claude/next-round-prompt.md" "$NEXT_ROUND_FALLBACK" \ + "PLAN_FILE=$PLAN_FILE" \ + "REVIEW_CONTENT=$REVIEW_CONTENT" \ + "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ + "BITLESSON_FILE=$BITLESSON_FILE" \ + "ROUND_CONTRACT_FILE=$NEXT_CONTRACT_FILE" \ + "CURRENT_ROUND=$NEXT_ROUND" \ + "STALL_COUNT=$NEXT_MAINLINE_STALL_COUNT" \ + "LAST_MAINLINE_VERDICT=$NEXT_LAST_MAINLINE_VERDICT" > "$NEXT_PROMPT_FILE" +fi + +if [[ "$DRIFT_REPLAN_REQUIRED" == "true" ]] && [[ "$BITLESSON_REQUIRED" == "true" ]] && ! grep -q 'bitlesson-selector' "$NEXT_PROMPT_FILE"; then + cat >> "$NEXT_PROMPT_FILE" << EOF + +## BitLesson Selection (REQUIRED FOR EACH TASK) + +Before executing each task or sub-task, you MUST: + +1. Read @$BITLESSON_FILE +2. Run \`bitlesson-selector\` for each task/sub-task to select relevant lesson IDs +3. Follow the selected lesson IDs (or \`NONE\`) during implementation + +Reference: @$BITLESSON_FILE +EOF +fi if [[ "$AGENT_TEAMS" == "true" ]]; then ENFORCEMENT_BLOCK="**Delegation Warning**: Do NOT implement code yourself in Agent Teams mode; delegate all coding tasks to team members." @@ -1814,6 +2056,9 @@ fi # Build system message SYSTEM_MSG="Loop: Round $NEXT_ROUND/$MAX_ITERATIONS - Codex found issues to address" +if [[ "$DRIFT_REPLAN_REQUIRED" == "true" ]]; then + SYSTEM_MSG="Loop: Round $NEXT_ROUND/$MAX_ITERATIONS - Mainline drift detected, replan required" +fi # Block exit and send review feedback jq -n \ diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 76cf9c03..7259dce8 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -6,7 +6,8 @@ # - Todos files (should use native Task tools instead) # - Prompt files (read-only, generated by Codex) # - State files (managed by hooks, not Claude) -# - Goal tracker after Round 0 +# - Wrong round number contract files +# - Goal tracker edits outside the active loop or that alter the immutable section # - PR loop state files (.humanize/pr-loop/) # - PR loop read-only files (pr-comment, prompt, codex-prompt, pr-check, pr-feedback) # @@ -101,6 +102,10 @@ fi # Detect if we're in Finalize Phase (finalize-state.md exists) STATE_FILE_TO_PARSE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") +IS_FINALIZE_PHASE=false +if [[ "$STATE_FILE_TO_PARSE" == *"/finalize-state.md" ]]; then + IS_FINALIZE_PHASE=true +fi # Parse state file using strict validation (fail closed on malformed state) if ! parse_state_file_strict "$STATE_FILE_TO_PARSE" 2>/dev/null; then @@ -124,6 +129,11 @@ if is_state_file_path "$FILE_PATH_LOWER"; then exit 2 fi +if [[ "$IS_FINALIZE_PHASE" == "true" ]] && is_round_file_type "$FILE_PATH_LOWER" "contract"; then + finalize_contract_blocked_message "edit" >&2 + exit 2 +fi + # ======================================== # Block Plan Backup Edits # ======================================== @@ -139,20 +149,52 @@ if [[ "$FILENAME" == "plan.md" ]]; then fi # ======================================== -# Block Goal Tracker After Round 0 +# Validate Goal Tracker Edits # ======================================== -if is_goal_tracker_path "$FILE_PATH_LOWER" && [[ "$CURRENT_ROUND" -gt 0 ]]; then - SUMMARY_FILE="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" - goal_tracker_blocked_message "$CURRENT_ROUND" "$SUMMARY_FILE" >&2 - exit 2 +if is_goal_tracker_path "$FILE_PATH_LOWER"; then + GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" + NORMALIZED_FILE_PATH=$(_normalize_path "$FILE_PATH") + NORMALIZED_GOAL_TRACKER_PATH=$(_normalize_path "$GOAL_TRACKER_PATH") + + if [[ "$NORMALIZED_FILE_PATH" != "$NORMALIZED_GOAL_TRACKER_PATH" ]]; then + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 + exit 2 + fi + + if [[ "$CURRENT_ROUND" -gt 0 ]]; then + if ! echo "$HOOK_INPUT" | jq -e '.tool_input | has("old_string") and has("new_string")' >/dev/null 2>&1; then + echo "Error: Missing required field: tool_input.old_string or tool_input.new_string" >&2 + exit 1 + fi + OLD_STRING=$(echo "$HOOK_INPUT" | jq -r '.tool_input.old_string // ""') + if [[ -z "$OLD_STRING" ]]; then + echo "Error: Missing required field: tool_input.old_string" >&2 + exit 1 + fi + + NEW_STRING=$(echo "$HOOK_INPUT" | jq -r '.tool_input.new_string // ""') + REPLACE_ALL=$(echo "$HOOK_INPUT" | jq -r '.tool_input.replace_all // false') + + if ! UPDATED_CONTENT=$(preview_edit_result "$GOAL_TRACKER_PATH" "$OLD_STRING" "$NEW_STRING" "$REPLACE_ALL" 2>/dev/null); then + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 + exit 2 + fi + + if ! goal_tracker_mutable_update_allowed "$GOAL_TRACKER_PATH" "$UPDATED_CONTENT"; then + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 + exit 2 + fi + fi + + exit 0 fi # ======================================== -# Validate Summary File Round Number +# Validate Summary/Contract File Round Number # ======================================== -if is_round_file_type "$FILE_PATH_LOWER" "summary"; then +if is_round_file_type "$FILE_PATH_LOWER" "summary" || is_round_file_type "$FILE_PATH_LOWER" "contract"; then # Extract filename from path (portable - works in bash and zsh) CLAUDE_FILENAME=$(echo "$FILE_PATH" | sed -n 's|.*\.humanize/rlcr/[^/]*/\(.*\)$|\1|p') if [[ -z "$CLAUDE_FILENAME" ]]; then @@ -161,9 +203,10 @@ if is_round_file_type "$FILE_PATH_LOWER" "summary"; then if [[ -n "$CLAUDE_FILENAME" ]]; then CLAUDE_ROUND=$(extract_round_number "$CLAUDE_FILENAME") + FILE_TYPE=$([[ "$FILE_PATH_LOWER" == *"-contract.md" ]] && echo "contract" || echo "summary") if [[ -n "$CLAUDE_ROUND" ]] && [[ "$CLAUDE_ROUND" != "$CURRENT_ROUND" ]] && ! is_allowlisted_file "$FILE_PATH" "$ACTIVE_LOOP_DIR"; then - CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" + CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-${FILE_TYPE}.md" FALLBACK="# Wrong Round Number You tried to {{ACTION}} round-{{CLAUDE_ROUND}}-{{FILE_TYPE}}.md but current round is **{{CURRENT_ROUND}}**. @@ -172,7 +215,7 @@ Edit: {{CORRECT_PATH}}" load_and_render_safe "$TEMPLATE_DIR" "block/wrong-round-number.md" "$FALLBACK" \ "ACTION=edit" \ "CLAUDE_ROUND=$CLAUDE_ROUND" \ - "FILE_TYPE=summary" \ + "FILE_TYPE=$FILE_TYPE" \ "CURRENT_ROUND=$CURRENT_ROUND" \ "CORRECT_PATH=$CORRECT_PATH" >&2 exit 2 diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index f0b6f71f..02d15202 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -3,10 +3,11 @@ # PreToolUse Hook: Validate Read access for RLCR loop and PR loop files # # Blocks Claude from reading: -# - Wrong round's prompt/summary files (outdated information) +# - Wrong round's prompt/summary/contract files (outdated information) # - Round files from wrong locations (not in .humanize/rlcr/) # - Round files from old session directories # - Todos files (should use native Task tools instead) +# - goal-tracker.md from old RLCR sessions # # PR loop files (.humanize/pr-loop/) are generally allowed to read # to give Claude access to comments, prompts, and feedback. @@ -66,15 +67,26 @@ if is_round_file_type "$FILE_PATH_LOWER" "todos"; then fi # ======================================== -# Check for Round Files (summary/prompt) +# Check for Restricted RLCR Files # ======================================== -if ! is_round_file_type "$FILE_PATH_LOWER" "summary" && ! is_round_file_type "$FILE_PATH_LOWER" "prompt"; then +IS_GOAL_TRACKER=$(is_goal_tracker_path "$FILE_PATH_LOWER" && echo "true" || echo "false") +IS_ROUND_FILE=$( + if is_round_file_type "$FILE_PATH_LOWER" "summary" || \ + is_round_file_type "$FILE_PATH_LOWER" "prompt" || \ + is_round_file_type "$FILE_PATH_LOWER" "contract"; then + echo "true" + else + echo "false" + fi +) + +IN_HUMANIZE_LOOP_DIR=$(is_in_humanize_loop_dir "$FILE_PATH" && echo "true" || echo "false") +if [[ "$IS_ROUND_FILE" != "true" ]] && ! { [[ "$IS_GOAL_TRACKER" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]]; }; then exit 0 fi CLAUDE_FILENAME=$(basename "$FILE_PATH") -IN_HUMANIZE_LOOP_DIR=$(is_in_humanize_loop_dir "$FILE_PATH" && echo "true" || echo "false") # ======================================== # Find Active Loop and Current Round @@ -90,6 +102,10 @@ fi # Detect if we're in Finalize Phase (finalize-state.md exists) STATE_FILE_TO_PARSE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") +IS_FINALIZE_PHASE=false +if [[ "$STATE_FILE_TO_PARSE" == *"/finalize-state.md" ]]; then + IS_FINALIZE_PHASE=true +fi # Parse state file using strict validation (fail closed on malformed state) if ! parse_state_file_strict "$STATE_FILE_TO_PARSE" 2>/dev/null; then @@ -98,6 +114,35 @@ if ! parse_state_file_strict "$STATE_FILE_TO_PARSE" 2>/dev/null; then fi CURRENT_ROUND="$STATE_CURRENT_ROUND" +if [[ "$IS_FINALIZE_PHASE" == "true" ]] && is_round_file_type "$FILE_PATH_LOWER" "contract"; then + finalize_contract_blocked_message "read" >&2 + exit 2 +fi + +# ======================================== +# Validate Goal Tracker Path +# ======================================== + +if [[ "$IS_GOAL_TRACKER" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]]; then + CORRECT_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" + NORMALIZED_FILE_PATH=$(_normalize_path "$FILE_PATH") + NORMALIZED_CORRECT_PATH=$(_normalize_path "$CORRECT_PATH") + + if [[ "$NORMALIZED_FILE_PATH" != "$NORMALIZED_CORRECT_PATH" ]]; then + FALLBACK="# Wrong Goal Tracker Path + +Read the active loop goal tracker instead: {{CORRECT_PATH}}" + load_and_render_safe "$TEMPLATE_DIR" "block/wrong-file-location.md" "$FALLBACK" \ + "FILE_PATH=$FILE_PATH" \ + "ACTIVE_LOOP_DIR=$ACTIVE_LOOP_DIR" \ + "CURRENT_ROUND=$CURRENT_ROUND" \ + "CORRECT_PATH=$CORRECT_PATH" >&2 + exit 2 + fi + + exit 0 +fi + # ======================================== # Extract Round Number and File Type # ======================================== @@ -113,6 +158,8 @@ if is_round_file_type "$FILE_PATH_LOWER" "summary"; then FILE_TYPE="summary" elif is_round_file_type "$FILE_PATH_LOWER" "prompt"; then FILE_TYPE="prompt" +elif is_round_file_type "$FILE_PATH_LOWER" "contract"; then + FILE_TYPE="contract" fi # ======================================== diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 02090265..9c6bdc4b 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -6,8 +6,9 @@ # - Todos files (should use native Task tools instead) # - Prompt files (read-only, generated by Codex) # - Wrong round number summary files +# - Wrong round number contract files # - Summary files outside .humanize/rlcr/ -# - Goal tracker after Round 0 +# - Goal tracker writes outside the active loop or that alter the immutable section # - PR loop state files (.humanize/pr-loop/) # - PR loop read-only files (pr-comment, prompt, codex-prompt, pr-check, pr-feedback) # @@ -101,19 +102,20 @@ fi # ======================================== IS_SUMMARY_FILE=$(is_round_file_type "$FILE_PATH_LOWER" "summary" && echo "true" || echo "false") +IS_CONTRACT_FILE=$(is_round_file_type "$FILE_PATH_LOWER" "contract" && echo "true" || echo "false") IS_FINALIZE_SUMMARY=$(is_finalize_summary_path "$FILE_PATH_LOWER" && echo "true" || echo "false") IN_HUMANIZE_LOOP_DIR=$(is_in_humanize_loop_dir "$FILE_PATH" && echo "true" || echo "false") -# If not a summary file, not a finalize summary, and not in .humanize/rlcr, allow normally -if [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "false" ]]; then +# If not a summary file, not a contract file, not a finalize summary, and not in .humanize/rlcr, allow normally +if [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_CONTRACT_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "false" ]]; then exit 0 fi # For state.md, finalize-state.md, goal-tracker.md, and plan.md in .humanize/rlcr, we need further validation -# For other files in .humanize/rlcr that aren't summaries, allow them +# For other files in .humanize/rlcr that aren't summaries/contracts, allow them FILENAME=$(basename "$FILE_PATH") IS_PLAN_BACKUP=$([[ "$FILENAME" == "plan.md" ]] && echo "true" || echo "false") -if [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]] && [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ]]; then +if [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]] && [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_CONTRACT_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ]]; then if ! is_state_file_path "$FILE_PATH_LOWER" && ! is_finalize_state_file_path "$FILE_PATH_LOWER" && ! is_goal_tracker_path "$FILE_PATH_LOWER" && [[ "$IS_PLAN_BACKUP" != "true" ]]; then exit 0 fi @@ -174,6 +176,12 @@ if [[ "$IS_FINALIZE_SUMMARY" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "true fi fi +# There is no active round contract once the loop has entered Finalize Phase. +if [[ "$IS_FINALIZE_PHASE" == "true" ]] && [[ "$IS_CONTRACT_FILE" == "true" ]]; then + finalize_contract_blocked_message "write to" >&2 + exit 2 +fi + # ======================================== # Block Plan Backup Writes # ======================================== @@ -188,26 +196,54 @@ if [[ "$IS_PLAN_BACKUP" == "true" ]]; then fi # ======================================== -# Block Goal Tracker After Round 0 +# Validate Goal Tracker Writes # ======================================== -if is_goal_tracker_path "$FILE_PATH_LOWER" && [[ "$CURRENT_ROUND" -gt 0 ]]; then - SUMMARY_FILE="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" - goal_tracker_blocked_message "$CURRENT_ROUND" "$SUMMARY_FILE" >&2 - exit 2 +if is_goal_tracker_path "$FILE_PATH_LOWER"; then + GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" + NORMALIZED_FILE_PATH=$(_normalize_path "$FILE_PATH") + NORMALIZED_GOAL_TRACKER_PATH=$(_normalize_path "$GOAL_TRACKER_PATH") + + if [[ "$NORMALIZED_FILE_PATH" != "$NORMALIZED_GOAL_TRACKER_PATH" ]]; then + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 + exit 2 + fi + + if [[ "$CURRENT_ROUND" -gt 0 ]]; then + if ! require_tool_input_field "$HOOK_INPUT" "content"; then + exit 1 + fi + + UPDATED_CONTENT=$(echo "$HOOK_INPUT" | jq -r '.tool_input.content // ""') + if ! goal_tracker_mutable_update_allowed "$GOAL_TRACKER_PATH" "$UPDATED_CONTENT"; then + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 + exit 2 + fi + fi + + exit 0 fi # ======================================== -# Block Summary Files Outside .humanize/rlcr +# Block Summary/Contract Files Outside .humanize/rlcr # ======================================== -if [[ "$IS_SUMMARY_FILE" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "false" ]]; then - CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" - FALLBACK="# Wrong Summary Location +if [[ "$IS_SUMMARY_FILE" == "true" || "$IS_CONTRACT_FILE" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "false" ]]; then + if [[ "$IS_CONTRACT_FILE" == "true" ]]; then + CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-contract.md" + FALLBACK="# Wrong Round Contract Location + +Write the round contract to the correct path: {{CORRECT_PATH}}" + load_and_render_safe "$TEMPLATE_DIR" "block/wrong-contract-location.md" "$FALLBACK" \ + "CORRECT_PATH=$CORRECT_PATH" >&2 + else + CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" + FALLBACK="# Wrong Summary Location Write summary to the correct path: {{CORRECT_PATH}}" - load_and_render_safe "$TEMPLATE_DIR" "block/wrong-summary-location.md" "$FALLBACK" \ - "CORRECT_PATH=$CORRECT_PATH" >&2 + load_and_render_safe "$TEMPLATE_DIR" "block/wrong-summary-location.md" "$FALLBACK" \ + "CORRECT_PATH=$CORRECT_PATH" >&2 + fi exit 2 fi @@ -224,14 +260,15 @@ if [[ -z "$CLAUDE_FILENAME" ]]; then fi # ======================================== -# Validate Round Number (for summary files) +# Validate Round Number (for summary/contract files) # ======================================== -if [[ "$IS_SUMMARY_FILE" == "true" ]]; then +if [[ "$IS_SUMMARY_FILE" == "true" || "$IS_CONTRACT_FILE" == "true" ]]; then CLAUDE_ROUND=$(extract_round_number "$CLAUDE_FILENAME") + FILE_TYPE=$([[ "$IS_CONTRACT_FILE" == "true" ]] && echo "contract" || echo "summary") if [[ -n "$CLAUDE_ROUND" ]] && [[ "$CLAUDE_ROUND" != "$CURRENT_ROUND" ]] && ! is_allowlisted_file "$FILE_PATH" "$ACTIVE_LOOP_DIR"; then - CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" + CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-${FILE_TYPE}.md" FALLBACK="# Wrong Round Number You tried to {{ACTION}} round-{{CLAUDE_ROUND}}-{{FILE_TYPE}}.md but current round is **{{CURRENT_ROUND}}**. @@ -240,7 +277,7 @@ Write to: {{CORRECT_PATH}}" load_and_render_safe "$TEMPLATE_DIR" "block/wrong-round-number.md" "$FALLBACK" \ "ACTION=write to" \ "CLAUDE_ROUND=$CLAUDE_ROUND" \ - "FILE_TYPE=summary" \ + "FILE_TYPE=$FILE_TYPE" \ "CURRENT_ROUND=$CURRENT_ROUND" \ "CORRECT_PATH=$CORRECT_PATH" >&2 exit 2 diff --git a/prompt-template/block/finalize-contract-access.md b/prompt-template/block/finalize-contract-access.md new file mode 100644 index 00000000..7b757d9d --- /dev/null +++ b/prompt-template/block/finalize-contract-access.md @@ -0,0 +1,7 @@ +# Finalize Contract Access Blocked + +There is no active `round-N-contract.md` during the Finalize Phase. + +- Do not {{ACTION}} historical round contract files. +- Use `finalize-summary.md` for finalize-only notes. +- Use `goal-tracker.md` if you need the current mainline/backlog state. diff --git a/prompt-template/block/goal-tracker-modification.md b/prompt-template/block/goal-tracker-modification.md index f7f26384..30c77835 100644 --- a/prompt-template/block/goal-tracker-modification.md +++ b/prompt-template/block/goal-tracker-modification.md @@ -1,25 +1,14 @@ -# Goal Tracker Modification Blocked (Round {{CURRENT_ROUND}}) +# Goal Tracker Update Blocked (Round {{CURRENT_ROUND}}) -After Round 0, **only Codex can modify the Goal Tracker**. +After Round 0, you may update only the **MUTABLE SECTION** of the active goal tracker. -You CANNOT directly modify `goal-tracker.md` via Write, Edit, or Bash commands. +Use Write or Edit on: +`{{CORRECT_PATH}}` -## How to Request Changes +## Rules -Include a **"Goal Tracker Update Request"** section in your summary file: -`{{SUMMARY_FILE}}` +- Keep the **IMMUTABLE SECTION** unchanged +- Do not modify `goal-tracker.md` via Bash +- Do not write to an old loop session's tracker -Use this format: -```markdown -## Goal Tracker Update Request - -### Requested Changes: -- [E.g., "Mark Task X as completed with evidence: tests pass"] -- [E.g., "Add to Open Issues: discovered Y needs addressing"] -- [E.g., "Plan Evolution: changed approach from A to B because..."] - -### Justification: -[Explain why these changes are needed and how they serve the Ultimate Goal] -``` - -Codex will review your request and update the Goal Tracker if the changes are justified. +If you need Codex to correct tracker drift that you could not safely resolve yourself, include an optional `Goal Tracker Update Request` in your summary. diff --git a/prompt-template/block/mainline-drift-stop.md b/prompt-template/block/mainline-drift-stop.md new file mode 100644 index 00000000..2b7cc53d --- /dev/null +++ b/prompt-template/block/mainline-drift-stop.md @@ -0,0 +1,14 @@ +# Mainline Drift Circuit Breaker + +The RLCR loop has been stopped because the implementation failed to advance the mainline for **{{STALL_COUNT}} consecutive rounds**. + +- Last mainline verdict: `{{LAST_VERDICT}}` +- Plan anchor: `{{PLAN_FILE}}` +- Drift status: `replan_required` + +This loop should not continue automatically. + +Next action: +1. Re-read the original plan +2. Identify why recent rounds kept stalling or regressing +3. Start a fresh RLCR loop with a narrower recovered mainline objective diff --git a/prompt-template/block/mainline-verdict-missing.md b/prompt-template/block/mainline-verdict-missing.md new file mode 100644 index 00000000..bf822e53 --- /dev/null +++ b/prompt-template/block/mainline-verdict-missing.md @@ -0,0 +1,13 @@ +# Mainline Verdict Missing + +The implementation review output is missing the required line: + +`Mainline Progress Verdict: ADVANCED / STALLED / REGRESSED` + +Humanize cannot safely update the drift state or choose the correct next-round prompt without this verdict. + +Retry the exit so Codex reruns the implementation review. + +Files: +- Review result: {{REVIEW_RESULT_FILE}} +- Review prompt: {{REVIEW_PROMPT_FILE}} diff --git a/prompt-template/block/round-contract-bash-write.md b/prompt-template/block/round-contract-bash-write.md new file mode 100644 index 00000000..bc012ffd --- /dev/null +++ b/prompt-template/block/round-contract-bash-write.md @@ -0,0 +1,7 @@ +# Round Contract Bash Write Blocked + +Do not use Bash commands to modify round contract files. + +Use the `Write` or `Edit` tool instead: + +`{{CORRECT_PATH}}` diff --git a/prompt-template/block/round-contract-missing.md b/prompt-template/block/round-contract-missing.md new file mode 100644 index 00000000..35a822b1 --- /dev/null +++ b/prompt-template/block/round-contract-missing.md @@ -0,0 +1,13 @@ +# Round Contract Missing + +Before you try to exit this round, write the current round contract to: +`{{ROUND_CONTRACT_FILE}}` + +The round contract must restate: +- The single **mainline objective** for this round +- The target ACs +- Which issues are truly **blocking** +- Which issues are **queued** and out of scope +- The concrete success criteria for this round + +Do not continue without a round contract. The loop uses it to prevent goal drift. diff --git a/prompt-template/block/wrong-contract-location.md b/prompt-template/block/wrong-contract-location.md new file mode 100644 index 00000000..04060c5f --- /dev/null +++ b/prompt-template/block/wrong-contract-location.md @@ -0,0 +1,5 @@ +# Wrong Round Contract Location + +Round contract files MUST be in the active loop directory. + +**Correct path**: `{{CORRECT_PATH}}` diff --git a/prompt-template/claude/drift-replan-prompt.md b/prompt-template/claude/drift-replan-prompt.md new file mode 100644 index 00000000..a5970c59 --- /dev/null +++ b/prompt-template/claude/drift-replan-prompt.md @@ -0,0 +1,68 @@ +Your work is not finished. Read and execute the below with ultrathink. + +## Drift Recovery Mode + +Codex judged the recent implementation rounds as failing to advance the mainline. + +- Consecutive stalled/regressed rounds: {{STALL_COUNT}} +- Last mainline verdict: {{LAST_MAINLINE_VERDICT}} + +This round is a **drift recovery round**. Do not continue with normal issue-clearing behavior. + +## Original Implementation Plan + +**IMPORTANT**: Re-anchor on the original plan first: +@{{PLAN_FILE}} + +## Required Recovery Re-anchor + +Before changing code: +- Re-read @{{PLAN_FILE}} +- Re-read @{{GOAL_TRACKER_FILE}} +- Re-read the recent round summaries and review results that led here +- Rewrite the round contract at @{{ROUND_CONTRACT_FILE}} + +Your recovery contract must contain: +- Exactly one recovered **mainline objective** +- The 1-2 target ACs that prove mainline progress this round +- The root cause of recent drift or stagnation +- Which issues are truly **blocking** the recovered mainline objective +- Which issues remain **queued** and explicitly out of scope +- Concrete success criteria that would change the verdict back to `ADVANCED` + +Do not start implementation until the recovery contract exists. + +## Task Lane Rules + +Use the Task system (TaskCreate, TaskUpdate, TaskList) with one required tag per task: +- `[mainline]` for plan-derived work that directly advances the recovered objective +- `[blocking]` for issues that prevent the recovered mainline objective from succeeding safely +- `[queued]` for non-blocking bugs, cleanup, or follow-up work + +Rules: +- This round must prove mainline movement, not just reduce noise +- `[blocking]` work is allowed only when it directly unblocks the recovered mainline objective +- `[queued]` work must stay documented but must NOT replace the recovered objective +- If a new issue does not block the recovered objective, tag it `[queued]` and keep moving on mainline work + +--- +Below is Codex's review result: + +{{REVIEW_CONTENT}} + +--- + +## Goal Tracker Reference + +Before starting work, **read and update** @{{GOAL_TRACKER_FILE}} as needed: +- Keep the immutable section unchanged +- Record the drift/stagnation cause in the mutable section if it changed planning +- Keep blocking vs queued issue classification accurate +- Ensure the tracker and contract now describe the same recovered mainline objective + +## Recovery Guardrails + +- Do not spend this round mostly on queued cleanup +- Do not broaden scope to compensate for previous stalls +- If the original approach was flawed, log the plan evolution explicitly instead of silently changing direction +- If you cannot produce a credible recovered mainline objective, say so in the summary with concrete blockers diff --git a/prompt-template/claude/finalize-phase-prompt.md b/prompt-template/claude/finalize-phase-prompt.md index 4d1c584b..2ee14176 100644 --- a/prompt-template/claude/finalize-phase-prompt.md +++ b/prompt-template/claude/finalize-phase-prompt.md @@ -40,9 +40,10 @@ The code-simplifier agent should focus on: ## Before Exiting -1. Complete all tasks (mark them as completed using TaskUpdate with status "completed") -2. Commit your changes with a descriptive message -3. Write your finalize summary to: **{{FINALIZE_SUMMARY_FILE}}** +1. Complete all `[mainline]` and `[blocking]` tasks (mark them as completed using TaskUpdate with status "completed") +2. `[queued]` tasks may remain only if they are documented as non-blocking follow-up work +3. Commit your changes with a descriptive message +4. Write your finalize summary to: **{{FINALIZE_SUMMARY_FILE}}** Your summary should include: - What simplifications were made diff --git a/prompt-template/claude/finalize-phase-skipped-prompt.md b/prompt-template/claude/finalize-phase-skipped-prompt.md index 654fabdb..5cb01c3c 100644 --- a/prompt-template/claude/finalize-phase-skipped-prompt.md +++ b/prompt-template/claude/finalize-phase-skipped-prompt.md @@ -39,9 +39,10 @@ These constraints are **non-negotiable**: ## Before Exiting -1. Complete all tasks (mark them as completed using TaskUpdate with status "completed") -2. Commit your changes with a descriptive message -3. Write your finalize summary to: **{{FINALIZE_SUMMARY_FILE}}** +1. Complete all `[mainline]` and `[blocking]` tasks (mark them as completed using TaskUpdate with status "completed") +2. `[queued]` tasks may remain only if they are documented as non-blocking follow-up work +3. Commit your changes with a descriptive message +4. Write your finalize summary to: **{{FINALIZE_SUMMARY_FILE}}** Your summary should include: - What work was done diff --git a/prompt-template/claude/goal-tracker-update-request.md b/prompt-template/claude/goal-tracker-update-request.md index 4c00d483..b685fd51 100644 --- a/prompt-template/claude/goal-tracker-update-request.md +++ b/prompt-template/claude/goal-tracker-update-request.md @@ -1,11 +1,12 @@ -**If Goal Tracker needs updates**, include this section in your summary: +**Optional fallback**: if you could not safely update the mutable section of `goal-tracker.md` directly, include this section in your summary: ```markdown ## Goal Tracker Update Request ### Requested Changes: - [E.g., "Mark Task X as completed with evidence: tests pass"] -- [E.g., "Add to Open Issues: discovered Y needs addressing"] +- [E.g., "Add to Blocking Side Issues: bug Y blocks AC-2"] +- [E.g., "Add to Queued Side Issues: cleanup Z is non-blocking"] - [E.g., "Plan Evolution: changed approach from A to B because..."] - [E.g., "Defer Task Z because... (impact on AC: none/minimal)"] @@ -13,4 +14,4 @@ [Explain why these changes are needed and how they serve the Ultimate Goal] ``` -Codex will review your request and update the Goal Tracker if justified. +Codex will review your request and reconcile the Goal Tracker if justified. diff --git a/prompt-template/claude/next-round-prompt.md b/prompt-template/claude/next-round-prompt.md index b3aaff01..fd1b1cfe 100644 --- a/prompt-template/claude/next-round-prompt.md +++ b/prompt-template/claude/next-round-prompt.md @@ -9,8 +9,35 @@ This plan contains the full scope of work and requirements. Ensure your work ali --- -For all tasks that need to be completed, please use the Task system (TaskCreate, TaskUpdate, TaskList) to track each item in order of importance. -You are strictly prohibited from only addressing the most important issues - you MUST create Tasks for ALL discovered issues and attempt to resolve each one. +## Round Re-anchor (REQUIRED FIRST STEP) + +Before writing code: +- Re-read @{{PLAN_FILE}} +- Re-read @{{GOAL_TRACKER_FILE}} +- Re-read the most recent round summaries/reviews that led to this round +- Write the current round contract to @{{ROUND_CONTRACT_FILE}} + +Your round contract must contain: +- Exactly one **mainline objective** +- The 1-2 target ACs for this round +- Which issues are truly **blocking** that mainline objective +- Which issues are **queued** and explicitly out of scope +- Concrete success criteria for this round + +Do not start implementation until the round contract exists. + +## Task Lane Rules + +Use the Task system (TaskCreate, TaskUpdate, TaskList) with one required tag per task: +- `[mainline]` for plan-derived work that directly advances this round's objective +- `[blocking]` for issues that prevent the mainline objective from succeeding safely +- `[queued]` for non-blocking bugs, cleanup, or follow-up work + +Rules: +- `[mainline]` work is the round's primary success condition +- `[blocking]` work is allowed only when it truly blocks the mainline objective +- `[queued]` work must be documented but must NOT replace the round objective +- If a new bug does not block the current objective, tag it `[queued]` and keep moving on mainline work Before executing each task in this round: 1. Read @{{BITLESSON_FILE}} @@ -24,13 +51,25 @@ Below is Codex's review result: --- -## Goal Tracker Reference (READ-ONLY after Round 0) +## Goal Tracker Reference Before starting work, **read** @{{GOAL_TRACKER_FILE}} to understand: - The Ultimate Goal and Acceptance Criteria you're working toward - Which tasks are Active, Completed, or Deferred +- Which side issues are blocking vs queued - Any Plan Evolution that has occurred -- Open Issues that need attention +- The latest side-issue state that needs attention + +**IMPORTANT**: Keep the mutable section of `goal-tracker.md` up to date during the round. +Do NOT change the immutable section after Round 0. +If you cannot safely reconcile the tracker yourself, include an optional "Goal Tracker Update Request" section in your summary (see below). + +## Mainline Guardrails -**IMPORTANT**: You CANNOT directly modify goal-tracker.md after Round 0. -If you need to update the Goal Tracker, include a "Goal Tracker Update Request" section in your summary (see below). +- Keep the mainline objective from @{{ROUND_CONTRACT_FILE}} stable for this round +- Do not let queued issues take over the round +- If Codex reported several findings, classify them into: + - mainline gaps + - blocking side issues + - queued side issues +- Only mainline gaps and blocking side issues should drive the next code changes diff --git a/prompt-template/claude/post-alignment-action-items.md b/prompt-template/claude/post-alignment-action-items.md index 28611ec0..c78e95d0 100644 --- a/prompt-template/claude/post-alignment-action-items.md +++ b/prompt-template/claude/post-alignment-action-items.md @@ -5,3 +5,4 @@ This round follows a Full Goal Alignment Check. Pay special attention to: - **Forgotten Items**: Codex may have identified tasks that were being ignored. Address them. - **AC Status**: If any Acceptance Criteria were marked NOT MET, prioritize work toward those. - **Deferred Items**: If any deferrals were flagged as unjustified, un-defer them now. +- **Queued Issues**: Keep non-blocking follow-up work queued unless it now clearly blocks mainline progress. diff --git a/prompt-template/claude/review-phase-prompt.md b/prompt-template/claude/review-phase-prompt.md index 158ca0f0..e180e418 100644 --- a/prompt-template/claude/review-phase-prompt.md +++ b/prompt-template/claude/review-phase-prompt.md @@ -2,14 +2,39 @@ You are in the **Review Phase**. Codex has performed a code review and found issues that need to be addressed. +## Required Re-anchor + +Before touching code: +- Re-read the original plan at @{{PLAN_FILE}} +- Re-read the goal tracker at @{{GOAL_TRACKER_FILE}} +- Refresh the current round contract at @{{ROUND_CONTRACT_FILE}} + +The round contract must preserve a single mainline objective. Code review findings do NOT automatically become the new round objective. + ## Review Results {{REVIEW_CONTENT}} +## Issue Classification + +Classify each review finding before acting on it: +- **blocking side issue**: prevents the current mainline objective from succeeding safely or prevents review acceptance +- **queued side issue**: valid follow-up, but does not block the current round objective + +Queued issues may be documented, but they must NOT take over the round. + +## Task Rules + +Every task must use one lane tag: +- `[blocking]` for review findings that must be fixed now +- `[queued]` for non-blocking follow-up work + +Do not create new `[mainline]` tasks in review phase unless the review proves the previous mainline objective was incomplete. + ## Instructions -1. **Read `.humanize/bitlesson.md` and run `bitlesson-selector`** for each fix task before coding -2. **Address all issues** marked with `[P0-9]` severity markers +1. **Refresh the round contract** at `{{ROUND_CONTRACT_FILE}}` +2. **Address blocking issues first** and keep the mainline objective stable 3. **Focus on fixes only** - do not add new features or make unrelated changes 4. **Commit your changes** after fixing the issues 5. **Write your summary** to: `{{SUMMARY_FILE}}` @@ -17,9 +42,13 @@ You are in the **Review Phase**. Codex has performed a code review and found iss ## Summary Template Your summary should include: -- Which issues were fixed -- How each issue was resolved +- The mainline objective for this round +- Which blocking issues were fixed +- Which issues were reclassified as queued follow-up +- How each fixed issue was resolved - Any issues that could not be resolved (with explanation) +- Confirmation that `goal-tracker.md` was updated if the blocking/queued issue lists changed +- A Goal Tracker Update Request only if tracker reconciliation still needs Codex help ## Important Notes diff --git a/prompt-template/codex/full-alignment-review.md b/prompt-template/codex/full-alignment-review.md index d8ced81b..02997dd8 100644 --- a/prompt-template/codex/full-alignment-review.md +++ b/prompt-template/codex/full-alignment-review.md @@ -47,16 +47,32 @@ Estimated remaining rounds: ? Critical blockers: [list if any] ``` -## Part 2: Implementation Review +## Part 2: Mainline Drift Audit (MANDATORY) + +Determine whether the recent rounds are still serving the original plan: +- Is the current round's mainline objective clear and singular? +- Has Claude been advancing mainline ACs, or mostly clearing side issues? +- Which findings are true **blocking side issues** versus merely **queued side issues**? + +Include a short drift summary: +``` +Mainline Progress Verdict: ADVANCED / STALLED / REGRESSED +Blocking Side Issues: N +Queued Side Issues: N +``` + +The `Mainline Progress Verdict` line is mandatory. If you omit it, the Humanize stop hook will block the round and require the review to be rerun. + +## Part 3: Implementation Review - Conduct a deep critical review of the implementation - Verify Claude's claims match reality - Identify any gaps, bugs, or incomplete work - Reference @{{DOCS_PATH}} for design documents -## Part 3: {{GOAL_TRACKER_UPDATE_SECTION}} +## Part 4: {{GOAL_TRACKER_UPDATE_SECTION}} -## Part 4: Progress Stagnation Check (MANDATORY for Full Alignment Rounds) +## Part 5: Progress Stagnation Check (MANDATORY for Full Alignment Rounds) To implement the original plan at @{{PLAN_FILE}}, we have completed **{{COMPLETED_ITERATIONS}} iterations** (Round 0 to Round {{CURRENT_ROUND}}). @@ -83,10 +99,13 @@ The project's `.humanize/rlcr/{{LOOP_TIMESTAMP}}/` directory contains the histor **If development is stagnating**, write **STOP** (as a single word on its own line) as the last line of your review output @{{REVIEW_RESULT_FILE}} instead of COMPLETE. -## Part 5: Output Requirements +## Part 6: Output Requirements - If issues found OR any AC is NOT MET (including deferred ACs), write your findings to @{{REVIEW_RESULT_FILE}} -- Include specific action items for Claude to address +- Include specific action items for Claude to address, classified into: + - Mainline Gaps + - Blocking Side Issues + - Queued Side Issues - **If development is stagnating** (see Part 4), write "STOP" as the last line - **CRITICAL**: Only write "COMPLETE" as the last line if ALL ACs from the original plan are FULLY MET with no deferrals - DEFERRED items are considered INCOMPLETE - do NOT output COMPLETE if any AC is deferred diff --git a/prompt-template/codex/goal-tracker-update-section.md b/prompt-template/codex/goal-tracker-update-section.md index 77cbedca..fb312db8 100644 --- a/prompt-template/codex/goal-tracker-update-section.md +++ b/prompt-template/codex/goal-tracker-update-section.md @@ -1,17 +1,18 @@ ## Goal Tracker Update Requests (YOUR RESPONSIBILITY) -**Important**: Claude cannot directly modify `goal-tracker.md` after Round 0. If Claude's summary contains a "Goal Tracker Update Request" section, YOU must: +Claude should normally keep the **mutable section** of `goal-tracker.md` up to date directly. If Claude's summary contains a "Goal Tracker Update Request" section, or if you detect tracker drift during review, YOU must: -1. **Evaluate the request**: Is the change justified? Does it serve the Ultimate Goal? -2. **If approved**: Update @{{GOAL_TRACKER_FILE}} yourself with the requested changes: +1. **Evaluate the tracker state**: Is the mutable section still aligned with the Ultimate Goal and current AC progress? +2. **If correction is needed**: Update @{{GOAL_TRACKER_FILE}} yourself with the requested changes: - Move tasks between Active/Completed/Deferred sections as appropriate - Add entries to "Plan Evolution Log" with round number and justification - - Add new issues to "Open Issues" if discovered + - Add new issues to "Blocking Side Issues" or "Queued Side Issues" as appropriate - **NEVER modify the IMMUTABLE SECTION** (Ultimate Goal and Acceptance Criteria) -3. **If rejected**: Include in your review why the request was rejected +3. **If you reject a requested tracker change**: Include in your review why it was rejected Common update requests you should handle: - Task completion: Move from "Active Tasks" to "Completed and Verified" -- New issues: Add to "Open Issues" table +- New blocking issues: Add to "Blocking Side Issues" +- New queued issues: Add to "Queued Side Issues" - Plan changes: Add to "Plan Evolution Log" with your assessment - Deferrals: Only allow with strong justification; add to "Explicitly Deferred" diff --git a/prompt-template/codex/regular-review.md b/prompt-template/codex/regular-review.md index 6d0a8671..7db26ea2 100644 --- a/prompt-template/codex/regular-review.md +++ b/prompt-template/codex/regular-review.md @@ -44,11 +44,28 @@ Include a brief Goal Alignment Summary in your review: ACs: X/Y addressed | Forgotten items: N | Unjustified deferrals: N ``` -## Part 3: {{GOAL_TRACKER_UPDATE_SECTION}} +## Part 3: Required Finding Classification -## Part 4: Output Requirements +You MUST classify your findings into these lanes: +- **Mainline Gaps**: plan-derived work or AC progress that is missing, incomplete, or regressing +- **Blocking Side Issues**: bugs or implementation issues that block the current mainline objective from succeeding safely +- **Queued Side Issues**: valid non-blocking follow-up issues that should be documented but must NOT take over the next round + +Also include a one-line verdict: +``` +Mainline Progress Verdict: ADVANCED / STALLED / REGRESSED +``` + +This verdict line is mandatory. If you omit it, the Humanize stop hook will block the round and require the review to be rerun. + +If Claude mostly worked on queued side issues and failed to advance the mainline, say so explicitly. + +## Part 4: {{GOAL_TRACKER_UPDATE_SECTION}} + +## Part 5: Output Requirements - In short, your review comments can include: problems/findings/blockers; claims that don't match reality; implementation plans for deferred work (to be implemented now); implementation plans for unfinished work; goal alignment issues. +- Your output should be structured so Claude can tell which items are mainline gaps, blocking side issues, and queued side issues. - If after your investigation the actual situation does not match what Claude claims to have completed, or there is pending work to be done, output your review comments to @{{REVIEW_RESULT_FILE}}. - **CRITICAL**: Only output "COMPLETE" as the last line if ALL tasks from the original plan are FULLY completed with no deferrals - DEFERRED items are considered INCOMPLETE - do NOT output COMPLETE if any task is deferred diff --git a/scripts/humanize.sh b/scripts/humanize.sh index 1613dd62..a64c18ba 100755 --- a/scripts/humanize.sh +++ b/scripts/humanize.sh @@ -33,6 +33,39 @@ humanize_split_to_array() { fi } +# Parse issue breakdown from goal-tracker.md +# Returns: blocking_issues|queued_issues|open_issues +humanize_parse_goal_tracker_issue_counts() { + local tracker_file="$1" + if [[ ! -f "$tracker_file" ]]; then + echo "0|0|0" + return + fi + + _count_table_data_rows() { + local row_count + row_count=$(sed -n "/$1/,/$2/p" "$tracker_file" | grep -cE '^\|' || true) + row_count=${row_count:-0} + echo $((row_count > 2 ? row_count - 2 : 0)) + } + + local blocking_issues + local queued_issues + local open_issues + + blocking_issues=$(_count_table_data_rows '### Blocking Side Issues' '^###') + queued_issues=$(_count_table_data_rows '### Queued Side Issues' '^###') + open_issues=$((blocking_issues + queued_issues)) + + # Legacy schema only had Open Issues; treat them as blocking for safety. + if [[ "$open_issues" -eq 0 ]]; then + open_issues=$(_count_table_data_rows '### Open Issues' '^###') + blocking_issues="$open_issues" + fi + + echo "${blocking_issues}|${queued_issues}|${open_issues}" +} + # Parse goal-tracker.md and return summary values # Returns: total_acs|completed_acs|active_tasks|completed_tasks|deferred_tasks|open_issues|goal_summary humanize_parse_goal_tracker() { @@ -105,9 +138,10 @@ humanize_parse_goal_tracker() { local deferred_tasks deferred_tasks=$(_count_table_data_rows '### Explicitly Deferred' '^###') - # Count Open Issues - local open_issues - open_issues=$(_count_table_data_rows '### Open Issues' '^###') + # Count Open Issues (new schema prefers Blocking/Queued Side Issues; old schema used Open Issues) + local -a issue_parts + humanize_split_to_array issue_parts "$(humanize_parse_goal_tracker_issue_counts "$tracker_file")" + local open_issues="${issue_parts[2]}" # Extract Ultimate Goal summary (first content line after heading) local goal_summary @@ -364,8 +398,11 @@ _humanize_monitor_codex() { local review_started=$(grep -E "^review_started:" "$state_file" 2>/dev/null | sed 's/review_started: *//' | tr -d ' ') local agent_teams=$(grep -E "^agent_teams:" "$state_file" 2>/dev/null | sed 's/agent_teams: *//' | tr -d ' ') local push_every_round=$(grep -E "^push_every_round:" "$state_file" 2>/dev/null | sed 's/push_every_round: *//' | tr -d ' ') + local mainline_stall_count=$(grep -E "^mainline_stall_count:" "$state_file" 2>/dev/null | sed 's/mainline_stall_count: *//' | tr -d ' ') + local last_mainline_verdict=$(grep -E "^last_mainline_verdict:" "$state_file" 2>/dev/null | sed 's/last_mainline_verdict: *//' | tr -d ' ') + local drift_status=$(grep -E "^drift_status:" "$state_file" 2>/dev/null | sed 's/drift_status: *//' | tr -d ' ') - echo "${current_round:-N/A}|${max_iterations:-N/A}|${full_review_round:-N/A}|${codex_model:-N/A}|${codex_effort:-N/A}|${started_at:-N/A}|${plan_file:-N/A}|${ask_codex_question:-false}|${review_started:-false}|${agent_teams:-}|${push_every_round:-}" + echo "${current_round:-N/A}|${max_iterations:-N/A}|${full_review_round:-N/A}|${codex_model:-N/A}|${codex_effort:-N/A}|${started_at:-N/A}|${plan_file:-N/A}|${ask_codex_question:-false}|${review_started:-false}|${agent_teams:-}|${push_every_round:-}|${mainline_stall_count:-0}|${last_mainline_verdict:-unknown}|${drift_status:-normal}" } # Internal wrappers that call top-level functions @@ -405,6 +442,9 @@ _humanize_monitor_codex() { local review_started="${state_parts[8]:-false}" local agent_teams="${state_parts[9]:-}" local push_every_round="${state_parts[10]:-}" + local mainline_stall_count="${state_parts[11]:-0}" + local last_mainline_verdict="${state_parts[12]:-unknown}" + local drift_status="${state_parts[13]:-normal}" # Parse goal-tracker.md local -a goal_parts @@ -416,6 +456,10 @@ _humanize_monitor_codex() { local deferred_tasks="${goal_parts[4]}" local open_issues="${goal_parts[5]}" local goal_summary="${goal_parts[6]}" + local -a issue_parts + _split_to_array issue_parts "$(humanize_parse_goal_tracker_issue_counts "$goal_tracker_file")" + local blocking_issues="${issue_parts[0]}" + local queued_issues="${issue_parts[1]}" # Parse git status local -a git_parts @@ -548,18 +592,35 @@ _humanize_monitor_codex() { fi team_mode_segment=" | Team Mode: ${team_color}${team_display}${reset}" fi - printf "${magenta}Status:${reset} ${status_line} | Codex Ask Question: ${ask_q_color}${ask_q_display}${reset}${team_mode_segment}${clr_eol}\n" + local drift_segment="" + local drift_color="${dim}" + if [[ "$drift_status" == "replan_required" ]]; then + drift_color="${red}" + elif [[ "${mainline_stall_count:-0}" -gt 0 ]]; then + drift_color="${yellow}" + fi + if [[ -n "$drift_status" ]]; then + drift_segment=" | Drift: ${drift_color}${drift_status}${reset} (${mainline_stall_count}, ${last_mainline_verdict})" + fi + printf "${magenta}Status:${reset} ${status_line} | Codex Ask Question: ${ask_q_color}${ask_q_display}${reset}${team_mode_segment}${drift_segment}${clr_eol}\n" # Progress line (color based on completion status) local ac_color="${green}" [[ "$completed_acs" -lt "$total_acs" ]] && ac_color="${yellow}" - local issue_color="${dim}" - [[ "$open_issues" -gt 0 ]] && issue_color="${red}" + local issue_total_color="${dim}" + [[ "$queued_issues" -gt 0 ]] && issue_total_color="${yellow}" + [[ "$blocking_issues" -gt 0 ]] && issue_total_color="${red}" # Use magenta for Progress and Git labels (status/data lines) printf "${magenta}Progress:${reset} ${ac_color}ACs: ${completed_acs}/${total_acs}${reset} Tasks: ${active_tasks} active, ${completed_tasks} done" [[ "$deferred_tasks" -gt 0 ]] && printf " ${yellow}${deferred_tasks} deferred${reset}" - [[ "$open_issues" -gt 0 ]] && printf " ${issue_color}Issues: ${open_issues}${reset}" + if [[ "$open_issues" -gt 0 ]]; then + printf " ${issue_total_color}Issues: ${open_issues}${reset}" + [[ "$blocking_issues" -gt 0 ]] && printf " (${red}%s blocking${reset}" "$blocking_issues" + [[ "$queued_issues" -gt 0 ]] && printf "%s${yellow}%s queued${reset}" \ + "$([[ "$blocking_issues" -gt 0 ]] && echo ", " || echo "(")" "$queued_issues" + printf ")" + fi printf "${clr_eol}\n" # Git status line (same color as Progress) diff --git a/scripts/lib/monitor-common.sh b/scripts/lib/monitor-common.sh index 26bdaa9b..a6e894ef 100644 --- a/scripts/lib/monitor-common.sh +++ b/scripts/lib/monitor-common.sh @@ -384,6 +384,41 @@ get_pr_loop_phase_display() { # Goal Tracker Parsing # ======================================== +# Parse issue breakdown from goal-tracker.md +# Returns: blocking_issues|queued_issues|open_issues +# Usage: parse_goal_tracker_issue_counts "/path/to/goal-tracker.md" +parse_goal_tracker_issue_counts() { + local tracker_file="$1" + if [[ ! -f "$tracker_file" ]]; then + echo "0|0|0" + return + fi + + _count_table_rows() { + local start_pattern="$1" + local end_pattern="$2" + local row_count + row_count=$(sed -n "/${start_pattern}/,/${end_pattern}/p" "$tracker_file" | grep -cE '^\|' || true) + row_count=${row_count:-0} + echo $((row_count > 2 ? row_count - 2 : 0)) + } + + local blocking_issues + local queued_issues + local open_issues + + blocking_issues=$(_count_table_rows '### Blocking Side Issues' '^###') + queued_issues=$(_count_table_rows '### Queued Side Issues' '^###') + open_issues=$((blocking_issues + queued_issues)) + + if [[ "$open_issues" -eq 0 ]]; then + open_issues=$(_count_table_rows '### Open Issues' '^###') + blocking_issues="$open_issues" + fi + + echo "${blocking_issues}|${queued_issues}|${open_issues}" +} + # Parse goal-tracker.md and return summary values # Returns: total_acs|completed_acs|active_tasks|completed_tasks|deferred_tasks|open_issues|goal_summary # Usage: parse_goal_tracker "/path/to/goal-tracker.md" @@ -448,9 +483,19 @@ parse_goal_tracker() { local deferred_tasks deferred_tasks=$(_count_table_rows '### Explicitly Deferred' '^###') - # Count Open Issues + # Count Open Issues (new schema prefers Blocking/Queued Side Issues; old schema used Open Issues) + local issue_parts_raw local open_issues - open_issues=$(_count_table_rows '### Open Issues' '^###') + issue_parts_raw=$(parse_goal_tracker_issue_counts "$tracker_file") + if [[ -n "${ZSH_VERSION:-}" ]]; then + local -a issue_parts + issue_parts=("${(@s:|:)issue_parts_raw}") + open_issues="${issue_parts[3]}" + else + local -a issue_parts + IFS='|' read -r -a issue_parts <<< "$issue_parts_raw" + open_issues="${issue_parts[2]}" + fi # Extract Ultimate Goal summary local goal_summary diff --git a/scripts/setup-rlcr-loop.sh b/scripts/setup-rlcr-loop.sh index 20c1f32a..c5c079d4 100755 --- a/scripts/setup-rlcr-loop.sh +++ b/scripts/setup-rlcr-loop.sh @@ -48,10 +48,45 @@ BASE_BRANCH="" FULL_REVIEW_ROUND="$DEFAULT_FULL_REVIEW_ROUND" SKIP_IMPL="false" SKIP_IMPL_NO_PLAN="false" +SKIP_IMPL_PLAN_ANCHORED="false" ASK_CODEX_QUESTION="true" AGENT_TEAMS="false" BITLESSON_ALLOW_EMPTY_NONE="true" +extract_plan_goal_content() { + local plan_path="$1" + local goal_section="" + + goal_section=$({ sed -n '/^##[[:space:]]*[Gg]oal\|^##[[:space:]]*[Oo]bjective\|^##[[:space:]]*[Pp]urpose/,/^##/p' "$plan_path" 2>/dev/null || true; } | head -20 | tail -n +2 | head -10) + if [[ -n "$goal_section" ]]; then + printf '%s\n' "$goal_section" + return + fi + + awk ' + /^[[:space:]]*#/ { next } + /^[[:space:]]*$/ { + if (started) { + exit + } + next + } + { + print + started=1 + lines++ + if (lines >= 5) { + exit + } + } + ' "$plan_path" +} + +extract_plan_ac_content() { + local plan_path="$1" + { sed -n '/^##[[:space:]]*[Aa]cceptance\|^##[[:space:]]*[Cc]riteria\|^##[[:space:]]*[Rr]equirements/,/^##/p' "$plan_path" 2>/dev/null || true; } | head -30 | tail -n +2 | head -25 +} + show_help() { cat < "$GOAL_TRACKER_FILE" << 'GOAL_TRACKER_EOF' + if [[ "$SKIP_IMPL_PLAN_ANCHORED" == "true" ]]; then + PLAN_GOAL_CONTENT=$(extract_plan_goal_content "$FULL_PLAN_PATH") + PLAN_AC_CONTENT=$(extract_plan_ac_content "$FULL_PLAN_PATH") + + if [[ -z "$PLAN_GOAL_CONTENT" ]]; then + PLAN_GOAL_CONTENT="Preserve the original plan scope from $PLAN_FILE while resolving code review findings on the current branch." + fi + + if [[ -z "$PLAN_AC_CONTENT" ]]; then + PLAN_AC_CONTENT=$(cat < "$GOAL_TRACKER_FILE" << EOF +# Goal Tracker (Skip Implementation Mode with Plan Anchor) + +This RLCR loop was started with \`--skip-impl\` flag. The implementation phase was skipped, +but an explicit plan was provided and remains the scope anchor for review-only work. + +This tracker is still used to keep the review loop aligned around one mainline objective +and to separate blocking issues from queued follow-up work. + +## IMMUTABLE SECTION + +### Ultimate Goal + +$PLAN_GOAL_CONTENT + +### Acceptance Criteria + +$PLAN_AC_CONTENT + +--- + +## MUTABLE SECTION + +### Plan Version: Review-Only (Updated: Round 0) + +#### Plan Evolution Log +| Round | Change | Reason | Impact on AC | +|-------|--------|--------|--------------| +| 0 | Skip implementation mode initialized around explicit plan anchor | Loop started with \`--skip-impl\` and retained @$PLAN_FILE as scope anchor | Review stays aligned with original plan | + +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| [mainline] Preserve original plan alignment while resolving blocking review findings | Plan ACs in scope | pending | Review-only mode with explicit plan anchor | + +### Blocking Side Issues +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| + +### Queued Side Issues +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| + +### Completed and Verified +| AC | Task | Completed Round | Verified Round | Evidence | +|----|------|-----------------|----------------|----------| + +### Explicitly Deferred +| Task | Original AC | Deferred Since | Justification | When to Reconsider | +|------|-------------|----------------|---------------|-------------------| + +EOF + else + # Create review-only goal tracker for skip-impl mode without a plan (no placeholder text) + cat > "$GOAL_TRACKER_FILE" << 'GOAL_TRACKER_EOF' # Goal Tracker (Skip Implementation Mode) This RLCR loop was started with `--skip-impl` flag. The implementation phase was skipped, and the loop is running in code review mode only. -## Mode: Code Review Only +This tracker is still used to keep the review loop aligned around one mainline objective +and to separate blocking issues from queued follow-up work. + +## IMMUTABLE SECTION + +### Ultimate Goal + +Pass code review for the current branch without regressing existing behavior. + +### Acceptance Criteria + +- AC-1: All blocking `[P0-9]` code review findings are resolved. +- AC-2: Non-blocking follow-up items are explicitly queued and do not block completion. +- AC-3: Finalize phase can complete without introducing new review regressions. + +--- -The goal tracker is not used in skip-impl mode because: -- There is no implementation plan to track -- The loop focuses solely on code review quality -- No acceptance criteria tracking is needed +## MUTABLE SECTION -## What This Loop Does +### Plan Version: Review-Only (Updated: Round 0) -1. Runs `codex review` on changes between base branch and current branch -2. If issues are found, Claude fixes them iteratively -3. When no issues remain, enters finalize phase for code simplification +#### Plan Evolution Log +| Round | Change | Reason | Impact on AC | +|-------|--------|--------|--------------| +| 0 | Skip implementation mode initialized | Loop started with `--skip-impl` | Focus on review-only objective | + +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| [mainline] Pass code review for current branch | AC-1 | pending | Review-only mode | + +### Blocking Side Issues +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| + +### Queued Side Issues +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| + +### Completed and Verified +| AC | Task | Completed Round | Verified Round | Evidence | +|----|------|-----------------|----------------|----------| + +### Explicitly Deferred +| Task | Original AC | Deferred Since | Justification | When to Reconsider | +|------|-------------|----------------|---------------|-------------------| GOAL_TRACKER_EOF + fi else # Normal mode: create full goal tracker @@ -935,11 +1082,8 @@ GOAL_TRACKER_EOF # Extract goal from plan file (look for ## Goal, ## Objective, or first paragraph) # This is a heuristic - Claude will refine it in round 0 # Use ^## without leading whitespace - markdown headers should start at column 0 -GOAL_LINE=$(grep -i -m1 '^##[[:space:]]*\(goal\|objective\|purpose\)' "$FULL_PLAN_PATH" 2>/dev/null || echo "") -if [[ -n "$GOAL_LINE" ]]; then - # Get the content after the heading - # Use || true after sed to ignore SIGPIPE when head closes the pipe early (pipefail mode) - GOAL_SECTION=$({ sed -n '/^##[[:space:]]*[Gg]oal\|^##[[:space:]]*[Oo]bjective\|^##[[:space:]]*[Pp]urpose/,/^##/p' "$FULL_PLAN_PATH" || true; } | head -20 | tail -n +2 | head -10) +GOAL_SECTION=$(extract_plan_goal_content "$FULL_PLAN_PATH") +if [[ -n "$GOAL_SECTION" ]]; then echo "$GOAL_SECTION" >> "$GOAL_TRACKER_FILE" else # Use first non-empty, non-heading paragraph as goal description @@ -959,7 +1103,7 @@ GOAL_TRACKER_EOF # Extract acceptance criteria from plan file (look for ## Acceptance, ## Criteria, ## Requirements) # Use ^## without leading whitespace - markdown headers should start at column 0 # Use || true after sed to ignore SIGPIPE when head closes the pipe early (pipefail mode) -AC_SECTION=$({ sed -n '/^##[[:space:]]*[Aa]cceptance\|^##[[:space:]]*[Cc]riteria\|^##[[:space:]]*[Rr]equirements/,/^##/p' "$FULL_PLAN_PATH" 2>/dev/null || true; } | head -30 | tail -n +2 | head -25) +AC_SECTION=$(extract_plan_ac_content "$FULL_PLAN_PATH") if [[ -n "$AC_SECTION" ]]; then echo "$AC_SECTION" >> "$GOAL_TRACKER_FILE" else @@ -982,10 +1126,20 @@ cat >> "$GOAL_TRACKER_FILE" << 'GOAL_TRACKER_EOF' | 0 | Initial plan | - | - | #### Active Tasks - + | Task | Target AC | Status | Tag | Owner | Notes | |------|-----------|--------|-----|-------|-------| -| [To be populated by Claude based on plan] | - | pending | coding or analyze | claude or codex | - | +| [To be populated by Claude based on plan] | - | pending | coding or analyze | claude or codex | mainline task only | + +### Blocking Side Issues + +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| + +### Queued Side Issues + +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| ### Completed and Verified @@ -997,10 +1151,6 @@ cat >> "$GOAL_TRACKER_FILE" << 'GOAL_TRACKER_EOF' | Task | Original AC | Deferred Since | Justification | When to Reconsider | |------|-------------|----------------|---------------|-------------------| -### Open Issues - -| Issue | Discovered Round | Blocking AC | Resolution Path | -|-------|-----------------|-------------|-----------------| GOAL_TRACKER_EOF fi # End of skip-impl goal tracker handling @@ -1043,6 +1193,7 @@ SUMMARY_TMPL_EOF # ======================================== SUMMARY_PATH="$LOOP_DIR/round-0-summary.md" +ROUND_CONTRACT_PATH="$LOOP_DIR/round-0-contract.md" # Create the round-0 summary template with BitLesson Delta section if [[ "$SKIP_IMPL" != "true" ]]; then @@ -1050,6 +1201,28 @@ if [[ "$SKIP_IMPL" != "true" ]]; then fi if [[ "$SKIP_IMPL" == "true" ]]; then + if [[ "$SKIP_IMPL_PLAN_ANCHORED" == "true" ]]; then + cat > "$ROUND_CONTRACT_PATH" << EOF +# Round 0 Contract + +- Mainline Objective: Keep the current branch aligned with @$PLAN_FILE while resolving only review findings that block clean acceptance. +- Target ACs: The original plan acceptance criteria affected by the current branch changes. +- Blocking Side Issues In Scope: Any \`[P0-9]\` findings or regressions that block review acceptance or violate the original plan scope. +- Queued Side Issues Out of Scope: Non-blocking cleanup, follow-up refactors, or future improvements that do not block review acceptance or plan alignment. +- Success Criteria: Code review passes and the current branch still matches the original plan's intended scope. +EOF + else + cat > "$ROUND_CONTRACT_PATH" << 'ROUND_CONTRACT_EOF' +# Round 0 Contract + +- Mainline Objective: Run code review for the current branch and resolve only findings that block clean acceptance. +- Target ACs: AC-1, AC-2 +- Blocking Side Issues In Scope: Any `[P0-9]` findings from the active review cycle. +- Queued Side Issues Out of Scope: Non-blocking cleanup, follow-up refactors, or future improvements that do not block review acceptance. +- Success Criteria: Code review passes with no blocking findings, and any remaining non-blocking follow-up is explicitly queued. +ROUND_CONTRACT_EOF + fi + # Skip-impl mode: create a prompt for code review only cat > "$LOOP_DIR/round-0-prompt.md" << EOF # Skip Implementation Mode - Code Review Loop @@ -1066,6 +1239,11 @@ The loop will automatically run \`codex review\` on your changes when you try to If issues are found (marked with [P0-9] priority), you'll need to fix them before the loop ends. Do not try to execute anything to trigger the review - just stop and it will run automatically. +Before requesting review, read: +- @$PLAN_FILE +- @$GOAL_TRACKER_FILE +- @$ROUND_CONTRACT_PATH + ## Your Task 1. Review your current work @@ -1074,10 +1252,32 @@ Do not try to execute anything to trigger the review - just stop and it will run 4. Repeat until no issues remain 5. Enter finalize phase for code simplification -## Note +## Review Objective -Since this is skip-impl mode, there is no implementation plan to follow. -The goal tracker is not used - focus on fixing code review issues. +Use the round contract as the current anchor: +- Keep one stable mainline objective and do not let it drift +- Treat review findings as \`[blocking]\` only if they block review acceptance +- Record non-blocking follow-up as \`[queued]\` +- Do not let queued work take over the round + +EOF + if [[ "$SKIP_IMPL_PLAN_ANCHORED" == "true" ]]; then + cat >> "$LOOP_DIR/round-0-prompt.md" << EOF +- Keep review-only work aligned with the original plan at @$PLAN_FILE + +Implementation phase is skipped, but the original plan still defines the intended branch scope. + +EOF + else + cat >> "$LOOP_DIR/round-0-prompt.md" << 'EOF' +There is no explicit implementation plan for this loop, so the review-only contract is the primary anchor. + +EOF + fi + + cat >> "$LOOP_DIR/round-0-prompt.md" << EOF + +Keep @$ROUND_CONTRACT_PATH updated if the blocking/queued split changes materially during review iterations. When you're ready for review, write a brief summary of your changes and try to exit (do not try to execute anything, just stop). @@ -1098,8 +1298,21 @@ Before starting implementation, you MUST initialize the Goal Tracker: 1. Read @$GOAL_TRACKER_FILE 2. If the "Ultimate Goal" section says "[To be extracted...]", extract a clear goal statement from the plan 3. If the "Acceptance Criteria" section says "[To be defined...]", define 3-7 specific, testable criteria -4. Populate the "Active Tasks" table with tasks from the plan, mapping each to an AC and filling Tag/Owner -5. Write the updated goal-tracker.md +4. Populate the "Active Tasks" table with MAINLINE tasks from the plan, mapping each to an AC and filling Tag/Owner +5. Record any already-known side issues in either "Blocking Side Issues" or "Queued Side Issues" +6. Write the updated goal-tracker.md + +## Round Contract Setup (REQUIRED BEFORE CODING) + +Before starting implementation, create @$ROUND_CONTRACT_PATH with: + +1. **One mainline objective** for this round +2. **Target ACs** (1-2 ACs only) +3. **Blocking side issues in scope** for this round +4. **Queued side issues out of scope** for this round +5. **Round success criteria** + +Use this contract to keep the round focused. Do NOT let non-blocking bugs or cleanup work replace the mainline objective. **IMPORTANT**: The IMMUTABLE SECTION can only be modified in Round 0. After this round, it becomes read-only. @@ -1107,8 +1320,18 @@ Before starting implementation, you MUST initialize the Goal Tracker: ## Implementation Plan -For all tasks that need to be completed, please use the Task system (TaskCreate, TaskUpdate, TaskList) to track each item in order of importance. -You are strictly prohibited from only addressing the most important issues - you MUST create Tasks for ALL discovered issues and attempt to resolve each one. +For all tasks that need to be completed, please use the Task system (TaskCreate, TaskUpdate, TaskList). + +Every task MUST start with exactly one lane tag: +- \`[mainline]\` for plan-derived work that directly advances the round objective +- \`[blocking]\` for issues that prevent the mainline objective from succeeding safely +- \`[queued]\` for non-blocking bugs, cleanup, or follow-up work + +Rules: +- \`[mainline]\` tasks are the primary success condition for the round +- \`[blocking]\` tasks may be resolved in the round only if they truly block mainline progress +- \`[queued]\` tasks must NOT become the round objective and do NOT need to be cleared before moving on +- If a new issue is not blocking the current objective, tag it \`[queued]\` and keep moving on the mainline ## Task Tag Routing (MUST FOLLOW) @@ -1177,18 +1400,24 @@ cat >> "$LOOP_DIR/round-0-prompt.md" << EOF Throughout your work, you MUST maintain the Goal Tracker: -1. **Before starting a task**: Mark it as "in_progress" in Active Tasks +1. **Before starting a round**: Re-anchor on the original plan and current round contract +2. **Before starting a task**: Mark the relevant mainline task as "in_progress" in Active Tasks - Confirm Tag/Owner routing is correct before execution -2. **After completing a task**: Move it to "Completed and Verified" with evidence (but mark as "pending verification") -3. **If you discover the plan has errors**: +3. **Active Tasks** are MAINLINE tasks only - side issues do not belong there +4. **Blocking Side Issues** are reserved for issues that truly stop mainline progress +5. **Queued Side Issues** are non-blocking and must not take over the round +6. **After completing a mainline task**: Move it to "Completed and Verified" with evidence (but mark as "pending verification") +7. **If you discover the plan has errors**: - Do NOT silently change direction - Add entry to "Plan Evolution Log" with justification - Explain how the change still serves the Ultimate Goal -4. **If you need to defer a task**: +8. **If you need to defer a task**: - Move it to "Explicitly Deferred" section - Provide strong justification - Explain impact on Acceptance Criteria -5. **If you discover new issues**: Add to "Open Issues" table +9. **If you discover new issues**: + - Add to "Blocking Side Issues" only if mainline progress is blocked + - Otherwise add to "Queued Side Issues" or keep them as \`[queued]\` tasks/backlog --- @@ -1197,8 +1426,9 @@ Note: You MUST NOT try to exit \`start-rlcr-loop\` loop by lying or edit loop st After completing the work, please: 0. If you have access to the \`code-simplifier\` agent, use it to review and optimize the code you just wrote 1. Finalize @$GOAL_TRACKER_FILE (this is Round 0, so you are initializing it - see "Goal Tracker Setup" above) -2. Commit your changes with a descriptive commit message -3. Write your work summary into @$SUMMARY_PATH +2. Write your round contract into @$ROUND_CONTRACT_PATH +3. Commit your changes with a descriptive commit message +4. Write your work summary into @$SUMMARY_PATH EOF # Add push instruction only if push_every_round is true diff --git a/tests/robustness/test-goal-tracker-robustness.sh b/tests/robustness/test-goal-tracker-robustness.sh index fe4c025b..88eda6fd 100755 --- a/tests/robustness/test-goal-tracker-robustness.sh +++ b/tests/robustness/test-goal-tracker-robustness.sh @@ -50,6 +50,16 @@ parse_result() { esac } +parse_issue_result() { + local result="$1" + local field="$2" + case "$field" in + blocking_issues) echo "$result" | cut -d'|' -f1 ;; + queued_issues) echo "$result" | cut -d'|' -f2 ;; + open_issues) echo "$result" | cut -d'|' -f3 ;; + esac +} + # ======================================== # Positive Tests - Valid Goal Tracker # ======================================== @@ -438,6 +448,55 @@ else fail "Deferred tasks count" "2" "$DEFERRED_TASKS" fi +# Test 15b: Distinguish blocking vs queued issues in new schema +echo "" +echo "Test 15b: Distinguish blocking vs queued issues" +cat > "$TEST_DIR/goal-tracker-issue-breakdown.md" << 'EOF' +# Goal Tracker + +### Acceptance Criteria + +- AC-1: Test + +--- + +### Blocking Side Issues + +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| +| Failing review item | 2 | AC-1 | Fix immediately | + +### Queued Side Issues + +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| +| Cleanup follow-up | 2 | Cosmetic only | Next refactor | +| Extra test hardening | 3 | Current AC already met | Regression appears | +EOF + +ISSUE_RESULT=$(humanize_parse_goal_tracker_issue_counts "$TEST_DIR/goal-tracker-issue-breakdown.md") +BLOCKING_ISSUES=$(parse_issue_result "$ISSUE_RESULT" blocking_issues) +QUEUED_ISSUES=$(parse_issue_result "$ISSUE_RESULT" queued_issues) +OPEN_ISSUES=$(parse_issue_result "$ISSUE_RESULT" open_issues) +if [[ "$BLOCKING_ISSUES" == "1" ]] && [[ "$QUEUED_ISSUES" == "2" ]] && [[ "$OPEN_ISSUES" == "3" ]]; then + pass "Separates blocking and queued issues in new schema" +else + fail "Issue breakdown" "1 blocking, 2 queued, 3 total" "$ISSUE_RESULT" +fi + +# Test 15c: Legacy open issues fallback maps to blocking count +echo "" +echo "Test 15c: Legacy open issues fallback maps to blocking count" +ISSUE_RESULT=$(humanize_parse_goal_tracker_issue_counts "$TEST_DIR/goal-tracker-issues.md") +BLOCKING_ISSUES=$(parse_issue_result "$ISSUE_RESULT" blocking_issues) +QUEUED_ISSUES=$(parse_issue_result "$ISSUE_RESULT" queued_issues) +OPEN_ISSUES=$(parse_issue_result "$ISSUE_RESULT" open_issues) +if [[ "$BLOCKING_ISSUES" == "2" ]] && [[ "$QUEUED_ISSUES" == "0" ]] && [[ "$OPEN_ISSUES" == "2" ]]; then + pass "Legacy open issues fallback treated as blocking" +else + fail "Legacy issue fallback" "2 blocking, 0 queued, 2 total" "$ISSUE_RESULT" +fi + # Test 16: File with only headers (no content) echo "" echo "Test 16: File with only section headers" diff --git a/tests/robustness/test-hook-system-robustness.sh b/tests/robustness/test-hook-system-robustness.sh index 8f302bb4..5e8413a6 100755 --- a/tests/robustness/test-hook-system-robustness.sh +++ b/tests/robustness/test-hook-system-robustness.sh @@ -317,8 +317,10 @@ echo "" # Test 12: Bash validator blocks state.md modification attempts echo "Test 12: Bash validator blocks state.md modification" # Create RLCR state for the test -mkdir -p "$TEST_DIR/.humanize/rlcr/2026-01-19_12-00-00" -cat > "$TEST_DIR/.humanize/rlcr/2026-01-19_12-00-00/state.md" << 'EOF' +HOOK_LOOP_DIR="$TEST_DIR/.humanize/rlcr/2026-01-19_12-00-00" +OLD_LOOP_DIR="$TEST_DIR/.humanize/rlcr/2026-01-19_11-00-00" +mkdir -p "$HOOK_LOOP_DIR" +cat > "$HOOK_LOOP_DIR/state.md" << 'EOF' --- current_round: 1 max_iterations: 42 @@ -333,6 +335,54 @@ review_started: false plan_tracked: false --- EOF +cat > "$HOOK_LOOP_DIR/goal-tracker.md" << 'EOF' +# Goal Tracker + +## IMMUTABLE SECTION + +### Ultimate Goal +Keep mainline aligned. + +### Acceptance Criteria +- AC-1: Mainline progress is visible every round. + +--- + +## MUTABLE SECTION + +### Plan Version: 1 (Updated: Round 1) + +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| [mainline] Keep AC-1 moving | AC-1 | pending | - | + +### Blocking Side Issues +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| + +### Queued Side Issues +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| +EOF +mkdir -p "$OLD_LOOP_DIR" +cat > "$OLD_LOOP_DIR/goal-tracker.md" << 'EOF' +# Old Goal Tracker + +## IMMUTABLE SECTION + +### Ultimate Goal +Old session tracker. + +### Acceptance Criteria +- AC-1: Old session only. + +--- + +## MUTABLE SECTION + +### Plan Version: 1 (Updated: Round 0) +EOF # Try to modify state.md - this SHOULD be blocked JSON='{"tool_name":"Bash","tool_input":{"command":"echo hacked >> '"$TEST_DIR"'/.humanize/rlcr/2026-01-19_12-00-00/state.md"}}' set +e @@ -366,9 +416,143 @@ else fail "Goal-tracker.md modification" "exit 2 (blocked)" "exit $EXIT_CODE, result: $RESULT" fi -# Test 12c: Unrelated dangerous commands are allowed through (sandbox handles security) +# Test 12c: Write validator allows mutable goal-tracker updates after round 0 +echo "" +echo "Test 12c: Write validator allows mutable goal-tracker updates after round 0" +cat > "$TEST_DIR/goal-tracker-updated.md" << 'EOF' +# Goal Tracker + +## IMMUTABLE SECTION + +### Ultimate Goal +Keep mainline aligned. + +### Acceptance Criteria +- AC-1: Mainline progress is visible every round. + +--- + +## MUTABLE SECTION + +### Plan Version: 1 (Updated: Round 1) + +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| [mainline] Keep AC-1 moving | AC-1 | in_progress | re-anchored | + +### Blocking Side Issues +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| +| failing test for AC-1 | 1 | AC-1 | fix before exit | + +### Queued Side Issues +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| +EOF +UPDATED_CONTENT=$(jq -Rs . < "$TEST_DIR/goal-tracker-updated.md") +JSON='{"tool_name":"Write","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","content":'"$UPDATED_CONTENT"'}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Write allows mutable goal-tracker updates after round 0" +else + fail "Goal-tracker mutable write" "exit 0" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12d: Write validator blocks immutable goal-tracker changes after round 0 +echo "" +echo "Test 12d: Write validator blocks immutable goal-tracker changes after round 0" +cat > "$TEST_DIR/goal-tracker-bad.md" << 'EOF' +# Goal Tracker + +## IMMUTABLE SECTION + +### Ultimate Goal +Change the goal entirely. + +### Acceptance Criteria +- AC-1: Mainline progress is visible every round. + +--- + +## MUTABLE SECTION + +### Plan Version: 1 (Updated: Round 1) +EOF +UPDATED_CONTENT=$(jq -Rs . < "$TEST_DIR/goal-tracker-bad.md") +JSON='{"tool_name":"Write","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","content":'"$UPDATED_CONTENT"'}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]]; then + pass "Write blocks immutable goal-tracker changes after round 0" +else + fail "Goal-tracker immutable write" "exit 2" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12e: Edit validator allows mutable goal-tracker edits after round 0 +echo "" +echo "Test 12e: Edit validator allows mutable goal-tracker edits after round 0" +JSON='{"tool_name":"Edit","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","old_string":"| [mainline] Keep AC-1 moving | AC-1 | pending | - |","new_string":"| [mainline] Keep AC-1 moving | AC-1 | in_progress | re-anchored |"}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Edit allows mutable goal-tracker updates after round 0" +else + fail "Goal-tracker mutable edit" "exit 0" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12f: Edit validator blocks immutable goal-tracker edits after round 0 +echo "" +echo "Test 12ea: Edit validator allows mutable deletions after round 0" +JSON='{"tool_name":"Edit","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","old_string":"| [mainline] Keep AC-1 moving | AC-1 | pending | - |","new_string":""}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Edit allows mutable goal-tracker deletions after round 0" +else + fail "Goal-tracker mutable delete" "exit 0" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12f: Edit validator blocks immutable goal-tracker edits after round 0 +echo "" +echo "Test 12f: Edit validator blocks immutable goal-tracker edits after round 0" +JSON='{"tool_name":"Edit","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","old_string":"Keep mainline aligned.","new_string":"Change the goal entirely."}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]]; then + pass "Edit blocks immutable goal-tracker updates after round 0" +else + fail "Goal-tracker immutable edit" "exit 2" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12g: Read validator blocks old-session goal tracker +echo "" +echo "Test 12g: Read validator blocks old-session goal tracker" +JSON='{"tool_name":"Read","tool_input":{"file_path":"'"$OLD_LOOP_DIR"'/goal-tracker.md"}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-read-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]]; then + pass "Read blocks old-session goal-tracker.md" +else + fail "Goal-tracker old-session read" "exit 2" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12h: Unrelated dangerous commands are allowed through (sandbox handles security) echo "" -echo "Test 12c: Unrelated dangerous commands allowed through (sandbox responsibility)" +echo "Test 12h: Unrelated dangerous commands allowed through (sandbox responsibility)" JSON='{"tool_name":"Bash","tool_input":{"command":"cat /tmp/test; rm -rf /"}}' set +e RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-bash-validator.sh" 2>&1) diff --git a/tests/robustness/test-setup-scripts-robustness.sh b/tests/robustness/test-setup-scripts-robustness.sh index 13fe21bc..b4ee9299 100755 --- a/tests/robustness/test-setup-scripts-robustness.sh +++ b/tests/robustness/test-setup-scripts-robustness.sh @@ -1046,6 +1046,32 @@ else fail "--skip-impl goal-tracker" "goal-tracker.md exists" "not found" fi +# Test 44b: --skip-impl creates round-0-contract.md +echo "" +echo "Test 44b: --skip-impl creates round-0-contract.md" +if [[ -n "$LOOP_DIR" ]] && [[ -f "$LOOP_DIR/round-0-contract.md" ]]; then + if grep -qi "Mainline Objective" "$LOOP_DIR/round-0-contract.md"; then + pass "--skip-impl creates round-0-contract.md with mainline objective" + else + fail "--skip-impl round contract content" "Mainline Objective text" "$(cat "$LOOP_DIR/round-0-contract.md")" + fi +else + fail "--skip-impl round contract" "round-0-contract.md exists" "not found" +fi + +# Test 44c: --skip-impl prompt references the round contract +echo "" +echo "Test 44c: --skip-impl prompt references round-0-contract.md" +if [[ -n "$LOOP_DIR" ]] && [[ -f "$LOOP_DIR/round-0-prompt.md" ]]; then + if grep -q "round-0-contract.md" "$LOOP_DIR/round-0-prompt.md"; then + pass "--skip-impl prompt references round-0-contract.md" + else + fail "--skip-impl prompt contract reference" "prompt mentions round-0-contract.md" "$(cat "$LOOP_DIR/round-0-prompt.md")" + fi +else + fail "--skip-impl prompt contract reference" "round-0-prompt.md exists" "not found" +fi + # Test 45: --skip-impl with plan file still works echo "" echo "Test 45: --skip-impl with plan file still works" @@ -1074,6 +1100,44 @@ else fi fi +LOOP_DIR_45=$(find "$TEST_DIR/repo45/.humanize/rlcr" -maxdepth 1 -type d -name "20*" 2>/dev/null | head -1) + +echo "" +echo "Test 45b: --skip-impl with plan file preserves plan goal in goal-tracker" +if [[ -n "$LOOP_DIR_45" ]] && [[ -f "$LOOP_DIR_45/goal-tracker.md" ]]; then + if grep -q "Test the setup script robustness" "$LOOP_DIR_45/goal-tracker.md"; then + pass "--skip-impl with plan preserves plan goal anchor" + else + fail "--skip-impl plan goal anchor" "goal-tracker contains plan goal" "$(cat "$LOOP_DIR_45/goal-tracker.md")" + fi +else + fail "--skip-impl plan goal anchor" "goal-tracker.md exists" "not found" +fi + +echo "" +echo "Test 45c: --skip-impl with plan file prompt references original plan" +if [[ -n "$LOOP_DIR_45" ]] && [[ -f "$LOOP_DIR_45/round-0-prompt.md" ]]; then + if grep -q "@plan.md" "$LOOP_DIR_45/round-0-prompt.md"; then + pass "--skip-impl with plan prompt references original plan" + else + fail "--skip-impl plan prompt anchor" "round-0-prompt references @plan.md" "$(cat "$LOOP_DIR_45/round-0-prompt.md")" + fi +else + fail "--skip-impl plan prompt anchor" "round-0-prompt.md exists" "not found" +fi + +echo "" +echo "Test 45d: --skip-impl with plan file contract references original plan alignment" +if [[ -n "$LOOP_DIR_45" ]] && [[ -f "$LOOP_DIR_45/round-0-contract.md" ]]; then + if grep -qi "aligned with @plan.md" "$LOOP_DIR_45/round-0-contract.md"; then + pass "--skip-impl with plan contract references original plan" + else + fail "--skip-impl plan contract anchor" "round-0-contract references @plan.md" "$(cat "$LOOP_DIR_45/round-0-contract.md")" + fi +else + fail "--skip-impl plan contract anchor" "round-0-contract.md exists" "not found" +fi + # ======================================== # Dependency Check Tests # ======================================== diff --git a/tests/robustness/test-state-file-robustness.sh b/tests/robustness/test-state-file-robustness.sh index ae6d0e7f..83f91824 100755 --- a/tests/robustness/test-state-file-robustness.sh +++ b/tests/robustness/test-state-file-robustness.sh @@ -473,6 +473,55 @@ else fail "Parses state with min full_review_round" "return 0" "returned non-zero" fi +# Test 22: State file with drift-tracking fields +echo "" +echo "Test 22: State file with drift-tracking fields" +cat > "$TEST_DIR/state-drift-fields.md" << 'EOF' +--- +current_round: 4 +max_iterations: 12 +review_started: false +base_branch: main +mainline_stall_count: 2 +last_mainline_verdict: stalled +drift_status: replan_required +--- +EOF + +if parse_state_file "$TEST_DIR/state-drift-fields.md"; then + if [[ "$STATE_MAINLINE_STALL_COUNT" == "2" ]] && [[ "$STATE_LAST_MAINLINE_VERDICT" == "stalled" ]] && [[ "$STATE_DRIFT_STATUS" == "replan_required" ]]; then + pass "Parses drift-tracking fields correctly" + else + fail "Parses drift-tracking fields" "stall=2 verdict=stalled drift=replan_required" \ + "stall=$STATE_MAINLINE_STALL_COUNT verdict=$STATE_LAST_MAINLINE_VERDICT drift=$STATE_DRIFT_STATUS" + fi +else + fail "Parses state with drift-tracking fields" "return 0" "returned non-zero" +fi + +# Test 23: Missing drift-tracking fields use safe defaults +echo "" +echo "Test 23: Missing drift-tracking fields use safe defaults" +cat > "$TEST_DIR/state-no-drift-fields.md" << 'EOF' +--- +current_round: 1 +max_iterations: 8 +review_started: false +base_branch: main +--- +EOF + +if parse_state_file "$TEST_DIR/state-no-drift-fields.md"; then + if [[ "$STATE_MAINLINE_STALL_COUNT" == "0" ]] && [[ "$STATE_LAST_MAINLINE_VERDICT" == "unknown" ]] && [[ "$STATE_DRIFT_STATUS" == "normal" ]]; then + pass "Uses safe defaults for drift-tracking fields" + else + fail "Default drift-tracking fields" "stall=0 verdict=unknown drift=normal" \ + "stall=$STATE_MAINLINE_STALL_COUNT verdict=$STATE_LAST_MAINLINE_VERDICT drift=$STATE_DRIFT_STATUS" + fi +else + fail "Parses state without drift-tracking fields" "return 0" "returned non-zero" +fi + # ======================================== # Summary # ======================================== diff --git a/tests/test-agent-teams.sh b/tests/test-agent-teams.sh index 1c685109..de52bb6c 100755 --- a/tests/test-agent-teams.sh +++ b/tests/test-agent-teams.sh @@ -455,6 +455,9 @@ ask_codex_question: false full_review_round: 5 session_id: agent_teams: $agent_teams +mainline_stall_count: 0 +last_mainline_verdict: unknown +drift_status: normal --- STATE_EOF @@ -483,6 +486,16 @@ GT_EOF Implemented features as requested. SUM_EOF + cat > "$LOOP_DIR/round-${round}-contract.md" << CONTRACT_EOF +# Round $round Contract + +- Mainline Objective: Continue the requested implementation round +- Target ACs: AC-1 +- Blocking Side Issues In Scope: none +- Queued Side Issues Out of Scope: none +- Success Criteria: advance the mainline objective without drift +CONTRACT_EOF + # Set up isolated cache directory export XDG_CACHE_HOME="$TEST_DIR/.cache" mkdir -p "$XDG_CACHE_HOME" @@ -536,6 +549,8 @@ MOCK_EOF setup_stophook_test 3 "true" "false" setup_mock_codex_impl_feedback "## Review Feedback +Mainline Progress Verdict: ADVANCED + Some issues found: - Issue 1: Missing error handling @@ -566,6 +581,46 @@ else fail "impl phase with agent_teams=true: next-round prompt contains agent-teams continuation" "round-4-prompt.md exists" "not found (hook exit=$HOOK_EXIT)" fi +# ======================================== +# Test: Drift recovery prompt still preserves agent-teams continuation +# ======================================== + +setup_stophook_test 3 "true" "false" +perl -0pi -e 's/mainline_stall_count: 0/mainline_stall_count: 1/' "$LOOP_DIR/state.md" +perl -0pi -e 's/last_mainline_verdict: unknown/last_mainline_verdict: stalled/' "$LOOP_DIR/state.md" +setup_mock_codex_impl_feedback "## Review Feedback + +Mainline Progress Verdict: STALLED + +- Mainline gap: AC-1 still has no stable implementation +- Blocking side issue: the team is repeating the same non-advancing fix pattern + +Recover the mainline before trying again. + +CONTINUE" + +HOOK_INPUT='{"stop_hook_active": false, "transcript": [], "session_id": ""}' +set +e +RESULT=$(echo "$HOOK_INPUT" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$STOP_HOOK" 2>/dev/null) +HOOK_EXIT=$? +set -e + +NEXT_PROMPT="$LOOP_DIR/round-4-prompt.md" +if [[ -f "$NEXT_PROMPT" ]]; then + if grep -q "Drift Recovery Mode" "$NEXT_PROMPT"; then + pass "drift recovery prompt generated for stalled mainline" + else + fail "drift recovery prompt generated for stalled mainline" "Drift Recovery Mode" "not found" + fi + if grep -qi "Agent Teams" "$NEXT_PROMPT"; then + pass "drift recovery prompt keeps agent-teams continuation" + else + fail "drift recovery prompt keeps agent-teams continuation" "agent-teams text in prompt" "not found" + fi +else + fail "drift recovery prompt keeps agent-teams continuation" "round-4-prompt.md exists" "not found (hook exit=$HOOK_EXIT)" +fi + # ======================================== # Test: Implementation phase with agent_teams=false has no continuation # ======================================== @@ -573,6 +628,8 @@ fi setup_stophook_test 3 "false" "false" setup_mock_codex_impl_feedback "## Review Feedback +Mainline Progress Verdict: ADVANCED + Some issues found: - Issue 1: Missing error handling diff --git a/tests/test-allowlist-validators.sh b/tests/test-allowlist-validators.sh index 6c604965..6c80022f 100755 --- a/tests/test-allowlist-validators.sh +++ b/tests/test-allowlist-validators.sh @@ -4,9 +4,9 @@ # # Tests: # - is_allowlisted_file() function in loop-common.sh -# - Read validator allowlist for todos and summaries -# - Write validator allowlist for todos and summaries -# - Edit validator allowlist for todos and summaries +# - Read validator allowlist for todos, summaries, and contracts +# - Write validator allowlist for todos, summaries, and contracts +# - Edit validator allowlist for todos, summaries, and contracts # - Bash validator allowlist for todos files (path-restricted) # @@ -117,6 +117,14 @@ else fail "round-2-summary.md blocked" "false" "true" fi +# Test 6b: Non-allowlisted file - round-0-contract.md +echo "Test 6b: round-0-contract.md is NOT allowlisted" +if ! is_allowlisted_file "$ACTIVE_LOOP_DIR/round-0-contract.md" "$ACTIVE_LOOP_DIR"; then + pass "round-0-contract.md is NOT allowlisted" +else + fail "round-0-contract.md blocked" "false" "true" +fi + # Test 7: Wrong directory - allowlisted filename but wrong path echo "Test 7: round-1-todos.md in wrong directory is NOT allowlisted" if ! is_allowlisted_file "/other/path/round-1-todos.md" "$ACTIVE_LOOP_DIR"; then @@ -158,6 +166,19 @@ else fail "Write validator round-0-summary.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi +# Test 9b: Write validator allows current round contract +echo "Test 9b: Write validator allows round-5-contract.md (current round)" +HOOK_INPUT='{"tool_name": "Write", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Write validator allows round-5-contract.md" +else + fail "Write validator round-5-contract.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 10: Write validator blocks round-3-todos.md (not in allowlist) echo "Test 10: Write validator blocks round-3-todos.md" HOOK_INPUT='{"tool_name": "Write", "tool_input": {"file_path": "'$LOOP_DIR'/round-3-todos.md"}}' @@ -184,6 +205,19 @@ else fail "Write validator round-2-summary.md" "exit 2 with round error" "exit $EXIT_CODE, output: $RESULT" fi +# Test 11b: Write validator blocks stale round contract +echo "Test 11b: Write validator blocks round-3-contract.md" +HOOK_INPUT='{"tool_name": "Write", "tool_input": {"file_path": "'$LOOP_DIR'/round-3-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "round"; then + pass "Write validator blocks round-3-contract.md" +else + fail "Write validator round-3-contract.md" "exit 2 with round error" "exit $EXIT_CODE, output: $RESULT" +fi + echo "" echo "=== Test: Edit Validator Allowlist ===" echo "" @@ -214,6 +248,32 @@ else fail "Edit validator round-1-summary.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi +# Test 13b: Edit validator allows current round contract +echo "Test 13b: Edit validator allows round-5-contract.md (current round)" +HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Edit validator allows round-5-contract.md" +else + fail "Edit validator round-5-contract.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" +fi + +# Test 13c: Edit validator blocks stale round contract +echo "Test 13c: Edit validator blocks round-0-contract.md" +HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/round-0-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "round"; then + pass "Edit validator blocks round-0-contract.md" +else + fail "Edit validator round-0-contract.md" "exit 2 with round error" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 14: Edit validator blocks round-4-todos.md echo "Test 14: Edit validator blocks round-4-todos.md" HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/round-4-todos.md"}}' @@ -257,6 +317,19 @@ else fail "Read validator round-0-summary.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi +# Test 16b: Read validator allows current round contract +echo "Test 16b: Read validator allows round-5-contract.md (current round)" +HOOK_INPUT='{"tool_name": "Read", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-read-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Read validator allows round-5-contract.md" +else + fail "Read validator round-5-contract.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 17: Read validator blocks round-3-todos.md echo "Test 17: Read validator blocks round-3-todos.md" HOOK_INPUT='{"tool_name": "Read", "tool_input": {"file_path": "'$LOOP_DIR'/round-3-todos.md"}}' @@ -283,6 +356,19 @@ else fail "Read validator round-3-summary.md" "exit 2 with round error" "exit $EXIT_CODE, output: $RESULT" fi +# Test 18b: Read validator blocks stale round contract +echo "Test 18b: Read validator blocks round-3-contract.md" +HOOK_INPUT='{"tool_name": "Read", "tool_input": {"file_path": "'$LOOP_DIR'/round-3-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-read-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "round"; then + pass "Read validator blocks round-3-contract.md" +else + fail "Read validator round-3-contract.md" "exit 2 with round error" "exit $EXIT_CODE, output: $RESULT" +fi + echo "" echo "=== Test: Bash Validator Allowlist (Path-Restricted) ===" echo "" @@ -313,6 +399,19 @@ else fail "Bash validator round-2-todos.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi +# Test 20b: Bash validator blocks round-5-contract.md +echo "Test 20b: Bash validator blocks round-5-contract.md" +HOOK_INPUT='{"tool_name": "Bash", "tool_input": {"command": "echo test > '$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-bash-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "contract"; then + pass "Bash validator blocks round-5-contract.md" +else + fail "Bash validator round-5-contract.md" "exit 2 with contract error" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 21: Bash validator blocks round-1-todos.md in wrong directory echo "Test 21: Bash validator blocks round-1-todos.md in wrong directory" HOOK_INPUT='{"tool_name": "Bash", "tool_input": {"command": "echo test > /tmp/round-1-todos.md"}}' diff --git a/tests/test-finalize-phase.sh b/tests/test-finalize-phase.sh index 96890a41..483ae665 100755 --- a/tests/test-finalize-phase.sh +++ b/tests/test-finalize-phase.sh @@ -200,6 +200,9 @@ plan_tracked: false start_branch: $current_branch base_branch: main review_started: false +mainline_stall_count: 0 +last_mainline_verdict: unknown +drift_status: normal started_at: 2024-01-01T12:00:00Z --- EOF @@ -223,6 +226,16 @@ Test finalize phase | Task | Target AC | Status | |------|-----------|--------| | Test | AC-1 | completed | +EOF + + cat > "$LOOP_DIR/round-${round}-contract.md" << EOF +# Round $round Contract + +- Mainline Objective: Verify finalize phase coverage +- Target ACs: AC-1 +- Blocking Side Issues In Scope: none +- Queued Side Issues Out of Scope: none +- Success Criteria: current round artifacts are complete EOF } @@ -366,6 +379,18 @@ else fail "Write validator finalize-state.md" "exit 2 with finalize error" "exit $EXIT_CODE, output: $RESULT" fi +echo "T-NEG-5aa: Write validator blocks round contract during Finalize Phase" +HOOK_INPUT='{"tool_name": "Write", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "contract"; then + pass "Write validator blocks finalize-phase round contract" +else + fail "Write validator finalize-phase contract" "exit 2 with contract error" "exit $EXIT_CODE, output: $RESULT" +fi + echo "T-NEG-5b: Edit validator blocks finalize-state.md" HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/finalize-state.md"}}' set +e @@ -378,6 +403,18 @@ else fail "Edit validator finalize-state.md" "exit 2 with finalize error" "exit $EXIT_CODE, output: $RESULT" fi +echo "T-NEG-5bb: Edit validator blocks round contract during Finalize Phase" +HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "contract"; then + pass "Edit validator blocks finalize-phase round contract" +else + fail "Edit validator finalize-phase contract" "exit 2 with contract error" "exit $EXIT_CODE, output: $RESULT" +fi + echo "T-NEG-5c: Bash validator blocks finalize-state.md modification" HOOK_INPUT='{"tool_name": "Bash", "tool_input": {"command": "echo test > '$LOOP_DIR'/finalize-state.md"}}' set +e @@ -513,6 +550,8 @@ setup_test_repo setup_loop_dir 3 10 # current_round: 3, max_iterations: 10 setup_mock_codex "All requirements met. +Mainline Progress Verdict: ADVANCED + COMPLETE" # Create summary for current round @@ -571,6 +610,8 @@ setup_test_repo setup_loop_dir 3 10 # current_round: 3, max_iterations: 10 setup_mock_codex_review_failure "All requirements met. +Mainline Progress Verdict: ADVANCED + COMPLETE" 1 # Create summary for current round @@ -630,6 +671,8 @@ setup_test_repo setup_loop_dir 4 10 # current_round: 4, max_iterations: 10 setup_mock_codex_review_empty_stdout "All requirements met. +Mainline Progress Verdict: ADVANCED + COMPLETE" # Create summary for current round @@ -752,6 +795,8 @@ setup_loop_dir 3 10 # current_round: 3, max_iterations: 10 # Create a mock Codex that outputs review feedback (not COMPLETE) setup_mock_codex "## Review Feedback +Mainline Progress Verdict: ADVANCED + Some issues need to be addressed: - Issue 1: Fix the bug in function X - Issue 2: Add tests for edge case Y @@ -813,6 +858,158 @@ else fail "Review feedback in output" "output contains 'Issue 1' from Codex review" "output does not contain expected feedback" fi +echo "" +echo "=== T-POS-6 / T-NEG-10: Mainline Drift State Machine ===" +echo "" + +# T-POS-6: Two consecutive stalled rounds trigger drift recovery prompt +rm -rf "$TEST_DIR/.humanize" +setup_test_repo +setup_loop_dir 3 10 +perl -0pi -e 's/mainline_stall_count: 0/mainline_stall_count: 1/' "$LOOP_DIR/state.md" +perl -0pi -e 's/last_mainline_verdict: unknown/last_mainline_verdict: stalled/' "$LOOP_DIR/state.md" + +setup_mock_codex "## Review Feedback + +Mainline Progress Verdict: STALLED + +- Mainline gap: AC-1 still lacks a passing implementation path +- Blocking side issue: current approach keeps looping on the same failing path + +Please recover the mainline before trying again. + +CONTINUE" + +cat > "$LOOP_DIR/round-3-summary.md" << 'EOF' +# Round 3 Summary +Tried another implementation pass, but AC-1 is still not advancing. +EOF + +TRANSCRIPT_FILE="$TEST_DIR/transcript.jsonl" +cat > "$TRANSCRIPT_FILE" << 'EOF' +{"type": "assistant", "message": {"content": [{"type": "tool_use", "name": "TodoWrite", "input": {"todos": [{"content": "[mainline] Recover AC-1", "status": "completed", "activeForm": "Recovering AC-1"}]}}]}} +EOF + +echo "T-POS-6: Two stalled rounds trigger drift recovery prompt" +HOOK_INPUT='{"stop_hook_active": false, "transcript_path": "'$TRANSCRIPT_FILE'"}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" 2>&1) +EXIT_CODE=$? +set -e + +if echo "$RESULT" | grep -q '"decision".*block' && [[ -f "$LOOP_DIR/round-4-prompt.md" ]]; then + pass "Drift recovery round blocks exit and creates next prompt" +else + fail "Drift recovery prompt creation" "block with round-4 prompt" "exit $EXIT_CODE, output: $RESULT" +fi + +if grep -q "Drift Recovery Mode" "$LOOP_DIR/round-4-prompt.md"; then + pass "Drift recovery prompt uses special replan template" +else + fail "Drift recovery prompt template" "Drift Recovery Mode in prompt" "$(cat "$LOOP_DIR/round-4-prompt.md")" +fi + +parse_state_file "$LOOP_DIR/state.md" +if [[ "$STATE_CURRENT_ROUND" == "4" ]] && [[ "$STATE_MAINLINE_STALL_COUNT" == "2" ]] && [[ "$STATE_LAST_MAINLINE_VERDICT" == "stalled" ]] && [[ "$STATE_DRIFT_STATUS" == "replan_required" ]]; then + pass "State records drift recovery requirement after second stalled round" +else + fail "Drift recovery state update" "round=4 stall=2 verdict=stalled drift=replan_required" \ + "round=$STATE_CURRENT_ROUND stall=$STATE_MAINLINE_STALL_COUNT verdict=$STATE_LAST_MAINLINE_VERDICT drift=$STATE_DRIFT_STATUS" +fi + +# T-NEG-10a: Missing Mainline Progress Verdict blocks exit and preserves state +rm -rf "$TEST_DIR/.humanize" +setup_test_repo +setup_loop_dir 3 10 +perl -0pi -e 's/mainline_stall_count: 0/mainline_stall_count: 1/' "$LOOP_DIR/state.md" +perl -0pi -e 's/last_mainline_verdict: unknown/last_mainline_verdict: stalled/' "$LOOP_DIR/state.md" + +setup_mock_codex "## Review Feedback + +- Mainline gap: AC-1 still lacks a passing implementation path +- Blocking side issue: current approach keeps looping on the same failing path + +Please restate the mainline more clearly. + +CONTINUE" + +cat > "$LOOP_DIR/round-3-summary.md" << 'EOF' +# Round 3 Summary +Tried another implementation pass, but the review omitted the verdict line. +EOF + +echo "T-NEG-10a: Missing Mainline Progress Verdict blocks exit" +HOOK_INPUT='{"stop_hook_active": false, "transcript_path": "'$TRANSCRIPT_FILE'"}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" 2>&1) +EXIT_CODE=$? +set -e + +if echo "$RESULT" | grep -q '"decision".*block' && echo "$RESULT" | grep -qi "verdict"; then + pass "Missing Mainline Progress Verdict blocks exit" +else + fail "Missing Mainline Progress Verdict" "block with verdict error" "exit $EXIT_CODE, output: $RESULT" +fi + +if [[ ! -f "$LOOP_DIR/round-4-prompt.md" ]]; then + pass "Missing verdict does not generate next-round prompt" +else + fail "Missing verdict prompt generation" "no round-4 prompt" "$(cat "$LOOP_DIR/round-4-prompt.md")" +fi + +parse_state_file "$LOOP_DIR/state.md" +if [[ "$STATE_CURRENT_ROUND" == "3" ]] && [[ "$STATE_MAINLINE_STALL_COUNT" == "1" ]] && [[ "$STATE_LAST_MAINLINE_VERDICT" == "stalled" ]] && [[ "$STATE_DRIFT_STATUS" == "normal" ]]; then + pass "Missing verdict preserves prior drift state" +else + fail "Missing verdict state preservation" "round=3 stall=1 verdict=stalled drift=normal" \ + "round=$STATE_CURRENT_ROUND stall=$STATE_MAINLINE_STALL_COUNT verdict=$STATE_LAST_MAINLINE_VERDICT drift=$STATE_DRIFT_STATUS" +fi + +# T-NEG-10: Third consecutive stalled/regressed round stops the loop +rm -rf "$TEST_DIR/.humanize" +setup_test_repo +setup_loop_dir 3 10 +perl -0pi -e 's/mainline_stall_count: 0/mainline_stall_count: 2/' "$LOOP_DIR/state.md" +perl -0pi -e 's/last_mainline_verdict: unknown/last_mainline_verdict: stalled/' "$LOOP_DIR/state.md" +perl -0pi -e 's/drift_status: normal/drift_status: replan_required/' "$LOOP_DIR/state.md" + +setup_mock_codex "## Review Feedback + +Mainline Progress Verdict: REGRESSED + +- Mainline gap: this round moved farther from AC-1 +- Blocking side issue: recent fixes keep undoing the prior mainline path + +Stop and replan. + +CONTINUE" + +cat > "$LOOP_DIR/round-3-summary.md" << 'EOF' +# Round 3 Summary +The latest attempt regressed the mainline objective again. +EOF + +echo "T-NEG-10: Third stalled/regressed round triggers circuit breaker" +HOOK_INPUT='{"stop_hook_active": false, "transcript_path": "'$TRANSCRIPT_FILE'"}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" 2>&1) +EXIT_CODE=$? +set -e + +if [[ -f "$LOOP_DIR/stop-state.md" ]] && echo "$RESULT" | grep -qi "drift"; then + pass "Third stalled/regressed round stops the loop with drift message" +else + fail "Drift circuit breaker" "stop-state.md and drift message" "exit $EXIT_CODE, files: $(ls "$LOOP_DIR"/*state*.md 2>/dev/null || echo 'none'), output: $RESULT" +fi + +parse_state_file "$LOOP_DIR/stop-state.md" +if [[ "$STATE_MAINLINE_STALL_COUNT" == "3" ]] && [[ "$STATE_LAST_MAINLINE_VERDICT" == "regressed" ]] && [[ "$STATE_DRIFT_STATUS" == "replan_required" ]]; then + pass "Stopped loop preserves final drift state" +else + fail "Preserved drift state on stop" "stall=3 verdict=regressed drift=replan_required" \ + "stall=$STATE_MAINLINE_STALL_COUNT verdict=$STATE_LAST_MAINLINE_VERDICT drift=$STATE_DRIFT_STATUS" +fi + echo "" echo "=== Validator Finalize Phase State Parsing Tests ===" echo "" @@ -850,6 +1047,18 @@ else fail "Read validator finalize-state.md parsing" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi +echo "Test: Read validator blocks round contract during Finalize Phase" +HOOK_INPUT='{"tool_name": "Read", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-read-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "contract"; then + pass "Read validator blocks finalize-phase round contract" +else + fail "Read validator finalize-phase contract" "exit 2 with contract error" "exit $EXIT_CODE, output: $RESULT" +fi + echo "Test: Plan-file validator parses finalize-state.md correctly" # The plan-file validator should not error when only finalize-state.md exists HOOK_INPUT='{"prompt": "test prompt"}' diff --git a/tests/test-plan-file-hooks.sh b/tests/test-plan-file-hooks.sh index d2e8af6f..c345d944 100755 --- a/tests/test-plan-file-hooks.sh +++ b/tests/test-plan-file-hooks.sh @@ -60,6 +60,21 @@ setup_mock_codex # Default branch name (set after first git init) DEFAULT_BRANCH="" +create_round_contract() { + local loop_dir="$1" + local round="$2" + + cat > "$loop_dir/round-${round}-contract.md" << EOF +# Round $round Contract + +- Mainline Objective: Keep plan-file integrity checks aligned +- Target ACs: AC-1 +- Blocking Side Issues In Scope: none +- Queued Side Issues Out of Scope: none +- Success Criteria: current round artifacts are present and coherent +EOF +} + setup_test_loop() { cd "$TEST_DIR" @@ -80,6 +95,7 @@ setup_test_loop() { # Create loop directory structure LOOP_DIR="$TEST_DIR/.humanize/rlcr/2024-01-01_12-00-00" + rm -rf "$LOOP_DIR" mkdir -p "$LOOP_DIR" # Create plan file (gitignored) @@ -91,7 +107,12 @@ Test the RLCR loop ## Requirements - Requirement 1 EOF - echo "plans/" >> .gitignore + cat >> .gitignore << 'EOF' +plans/ +.humanize* +.cache/ +bin/ +EOF git add .gitignore git -c commit.gpgsign=false commit -q -m "Add gitignore" @@ -111,6 +132,8 @@ base_branch: $CURRENT_BRANCH review_started: false --- EOF + + create_round_contract "$LOOP_DIR" 0 } echo "=== Test: UserPromptSubmit Hook ===" @@ -466,6 +489,38 @@ else fail "Stop hook YAML parsing" "no YAML parse errors" "output: $RESULT" fi +# Test 8.8b: Stop hook blocks when round contract is missing +echo "Test 8.8b: Stop hook blocks when round contract is missing" +setup_test_loop +rm -f "$LOOP_DIR/round-0-contract.md" +cat > "$LOOP_DIR/round-0-summary.md" << 'EOF' +# Summary +Work done. +EOF +cat > "$LOOP_DIR/goal-tracker.md" << 'EOF' +# Goal Tracker +## IMMUTABLE SECTION +### Ultimate Goal +Test goal +### Acceptance Criteria +- Criterion 1 +## MUTABLE SECTION +### Plan Version: 1 (Updated: Round 0) +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| Task 1 | AC1 | done | - | +EOF +set +e +RESULT=$(echo '{}' | "$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" 2>&1) +EXIT_CODE=$? +set -e +if echo "$RESULT" | grep -q '"decision"' && echo "$RESULT" | grep -qi "contract"; then + pass "Stop hook blocks when round contract is missing" +else + fail "Stop hook missing round contract" "block with contract error" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 8.9: Hook handles plan_file path with hyphens correctly echo "Test 8.9: Hook handles plan_file with hyphens in path" setup_test_loop @@ -642,6 +697,7 @@ cat > "$TRACKED_LOOP_DIR/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$TRACKED_LOOP_DIR" 0 cat > "$TRACKED_LOOP_DIR/goal-tracker.md" << 'EOF' # Goal Tracker ## IMMUTABLE SECTION @@ -738,6 +794,7 @@ cat > "$TRACKED_LOOP_DIR/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$TRACKED_LOOP_DIR" 0 cat > "$TRACKED_LOOP_DIR/goal-tracker.md" << 'EOF' # Goal Tracker ## IMMUTABLE SECTION @@ -822,6 +879,7 @@ cat > "$LOOP_DIR_14_1/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$LOOP_DIR_14_1" 0 # Goal tracker with ONLY Ultimate Goal placeholder (AC and Tasks are filled) cat > "$LOOP_DIR_14_1/goal-tracker.md" << 'EOF' # Goal Tracker @@ -893,6 +951,7 @@ cat > "$LOOP_DIR_14_2/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$LOOP_DIR_14_2" 0 # Goal tracker with ONLY AC placeholder (Goal and Tasks are filled) cat > "$LOOP_DIR_14_2/goal-tracker.md" << 'EOF' # Goal Tracker @@ -964,6 +1023,7 @@ cat > "$LOOP_DIR_14_3/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$LOOP_DIR_14_3" 0 # Goal tracker with ONLY Active Tasks placeholder (Goal and AC are filled) cat > "$LOOP_DIR_14_3/goal-tracker.md" << 'EOF' # Goal Tracker @@ -1033,6 +1093,7 @@ cat > "$LOOP_DIR_14_4/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$LOOP_DIR_14_4" 0 # Goal tracker with ALL placeholders cat > "$LOOP_DIR_14_4/goal-tracker.md" << 'EOF' # Goal Tracker diff --git a/tests/test-task-tag-routing.sh b/tests/test-task-tag-routing.sh index ae9365f7..3d4bc0fe 100755 --- a/tests/test-task-tag-routing.sh +++ b/tests/test-task-tag-routing.sh @@ -180,6 +180,15 @@ Keep routing behavior stable. | Task | Target AC | Status | Tag | Owner | Notes | |------|-----------|--------|-----|-------|-------| | Keep routing note | AC-1 | in_progress | analyze | codex | - +EOF + cat > "$loop_dir/round-0-contract.md" << 'EOF' +# Round 0 Contract + +- Mainline Objective: Keep routing behavior stable while addressing the current review feedback. +- Target ACs: AC-1 +- Blocking Side Issues In Scope: none +- Queued Side Issues Out of Scope: none +- Success Criteria: Follow-up prompt is generated with routing guidance intact. EOF cat > "$loop_dir/round-0-summary.md" << 'EOF' # Round 0 Summary @@ -197,6 +206,8 @@ setup_test_dir setup_stophook_repo "$TEST_DIR/hook-routing" create_mock_codex "$TEST_DIR/hook-routing/bin" "## Review Feedback +Mainline Progress Verdict: STALLED + Issue remains unresolved. CONTINUE" diff --git a/tests/test-todo-checker.sh b/tests/test-todo-checker.sh index b3e7b072..18b076a2 100755 --- a/tests/test-todo-checker.sh +++ b/tests/test-todo-checker.sh @@ -157,6 +157,36 @@ else fail "In-progress status" "exit 1" "exit $EXIT_CODE" fi +# Test 8b: Queued TodoWrite item does NOT block exit +echo "Test 8b: Queued TodoWrite item" +cat > "$TEST_DIR/transcript-queued.jsonl" << 'EOF' +{"type": "assistant", "message": {"content": [{"type": "tool_use", "name": "TodoWrite", "input": {"todos": [{"content": "[queued] Cleanup follow-up", "status": "pending"}]}}]}} +EOF +set +e +RESULT=$(echo "{\"transcript_path\": \"$TEST_DIR/transcript-queued.jsonl\"}" | python3 "$TODO_CHECKER" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Queued TodoWrite item exits 0" +else + fail "Queued TodoWrite item" "exit 0" "exit $EXIT_CODE, output: $RESULT" +fi + +# Test 8c: Lane tags in the middle of TodoWrite content do NOT downgrade blocking tasks +echo "Test 8c: Inline queued tag does not bypass TodoWrite blocker" +cat > "$TEST_DIR/transcript-inline-tag.jsonl" << 'EOF' +{"type": "assistant", "message": {"content": [{"type": "tool_use", "name": "TodoWrite", "input": {"todos": [{"content": "Fix docs mentioning [queued] follow-ups", "status": "pending"}]}}]}} +EOF +set +e +RESULT=$(echo "{\"transcript_path\": \"$TEST_DIR/transcript-inline-tag.jsonl\"}" | python3 "$TODO_CHECKER" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 1 ]] && echo "$RESULT" | grep -q '\[blocking\]'; then + pass "Inline queued tag still blocks TodoWrite item" +else + fail "Inline queued TodoWrite item" "exit 1 with [blocking] output" "exit $EXIT_CODE, output: $RESULT" +fi + # ======================================== # Test Group 3: Transcript Format Variations # ======================================== @@ -357,6 +387,57 @@ else fail "Task with in_progress status" "exit 1" "exit $EXIT_CODE, output: $RESULT" fi +# Test 19b: Queued file-based task does NOT block exit +echo "Test 19b: Queued task does not block" +MOCK_SESSION_19B="session-19b" +mkdir -p "$MOCK_TASKS_BASE/$MOCK_SESSION_19B" +cat > "$MOCK_TASKS_BASE/$MOCK_SESSION_19B/task-1.json" << 'EOF' +{"subject": "[queued] Follow-up cleanup", "status": "pending"} +EOF +set +e +RESULT=$(echo "{\"session_id\": \"$MOCK_SESSION_19B\", \"tasks_base_dir\": \"$MOCK_TASKS_BASE\"}" | python3 "$TODO_CHECKER" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Queued task exits 0" +else + fail "Queued task" "exit 0" "exit $EXIT_CODE, output: $RESULT" +fi + +# Test 19c: Explicit blocking tag still blocks +echo "Test 19c: Blocking task still blocks" +MOCK_SESSION_19C="session-19c" +mkdir -p "$MOCK_TASKS_BASE/$MOCK_SESSION_19C" +cat > "$MOCK_TASKS_BASE/$MOCK_SESSION_19C/task-1.json" << 'EOF' +{"subject": "[blocking] Fix failing test", "status": "pending"} +EOF +set +e +RESULT=$(echo "{\"session_id\": \"$MOCK_SESSION_19C\", \"tasks_base_dir\": \"$MOCK_TASKS_BASE\"}" | python3 "$TODO_CHECKER" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 1 ]] && echo "$RESULT" | grep -q '\[blocking\]'; then + pass "Blocking task exits 1 with lane marker" +else + fail "Blocking task" "exit 1 with [blocking] output" "exit $EXIT_CODE, output: $RESULT" +fi + +# Test 19d: Inline queued tag in task body does NOT downgrade blocking tasks +echo "Test 19d: Inline queued tag in task body does not bypass blocker" +MOCK_SESSION_19D="session-19d" +mkdir -p "$MOCK_TASKS_BASE/$MOCK_SESSION_19D" +cat > "$MOCK_TASKS_BASE/$MOCK_SESSION_19D/task-1.json" << 'EOF' +{"subject": "Triage review fallout", "description": "Notes mention [queued] cleanup but this task is still active", "status": "pending"} +EOF +set +e +RESULT=$(echo "{\"session_id\": \"$MOCK_SESSION_19D\", \"tasks_base_dir\": \"$MOCK_TASKS_BASE\"}" | python3 "$TODO_CHECKER" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 1 ]] && echo "$RESULT" | grep -q '\[blocking\]'; then + pass "Inline queued tag still blocks file-based task" +else + fail "Inline queued file-based task" "exit 1 with [blocking] output" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 20: Multiple tasks, one incomplete echo "Test 20: Multiple tasks, one incomplete" MOCK_SESSION_20="session-20" From cfc46d626e32e832e3ef2d6ec0fb9d2744bee33f Mon Sep 17 00:00:00 2001 From: Emin Date: Tue, 17 Mar 2026 15:43:41 +0800 Subject: [PATCH 20/97] Use portable shebang across all shell scripts Replace hardcoded #!/bin/bash with #!/usr/bin/env bash for better portability across different Unix systems where bash may be installed in non-standard locations. Co-Authored-By: Claude Opus 4.6 --- hooks/lib/loop-common.sh | 2 +- hooks/lib/template-loader.sh | 2 +- hooks/loop-bash-validator.sh | 2 +- hooks/loop-codex-stop-hook.sh | 2 +- hooks/loop-edit-validator.sh | 2 +- hooks/loop-plan-file-validator.sh | 2 +- hooks/loop-post-bash-hook.sh | 2 +- hooks/loop-read-validator.sh | 2 +- hooks/loop-write-validator.sh | 2 +- hooks/pr-loop-stop-hook.sh | 2 +- scripts/ask-codex.sh | 2 +- scripts/cancel-pr-loop.sh | 2 +- scripts/cancel-rlcr-loop.sh | 2 +- scripts/check-bot-reactions.sh | 2 +- scripts/check-pr-reviewer-status.sh | 2 +- scripts/fetch-pr-comments.sh | 2 +- scripts/humanize.sh | 2 +- scripts/install-skill.sh | 2 +- scripts/install-skills-codex.sh | 2 +- scripts/install-skills-kimi.sh | 2 +- scripts/lib/monitor-common.sh | 2 +- scripts/lib/monitor-skill.sh | 2 +- scripts/poll-pr-reviews.sh | 2 +- scripts/portable-timeout.sh | 2 +- scripts/rlcr-stop-gate.sh | 2 +- scripts/setup-pr-loop.sh | 2 +- scripts/setup-rlcr-loop.sh | 2 +- scripts/validate-gen-plan-io.sh | 2 +- tests/manual-monitor-test.sh | 2 +- tests/mocks/gh | 2 +- .../robustness/test-base-branch-detection.sh | 2 +- .../test-cancel-security-robustness.sh | 2 +- .../test-concurrent-state-robustness.sh | 2 +- .../test-git-operations-robustness.sh | 2 +- .../test-goal-tracker-robustness.sh | 2 +- .../robustness/test-hook-input-robustness.sh | 8 +-- .../robustness/test-hook-system-robustness.sh | 4 +- .../test-path-validation-robustness.sh | 4 +- tests/robustness/test-plan-file-robustness.sh | 4 +- tests/robustness/test-pr-loop-api-fetch.sh | 2 +- tests/robustness/test-pr-loop-api-poll.sh | 2 +- .../robustness/test-pr-loop-api-robustness.sh | 8 +-- tests/robustness/test-session-robustness.sh | 2 +- .../test-setup-scripts-robustness.sh | 39 +++++++++---- .../robustness/test-state-file-robustness.sh | 2 +- .../test-state-transition-robustness.sh | 2 +- .../test-template-error-robustness.sh | 2 +- .../test-template-stress-robustness.sh | 2 +- tests/robustness/test-timeout-robustness.sh | 2 +- tests/run-all-tests.sh | 4 +- tests/setup-fixture-mock-gh.sh | 4 +- tests/setup-monitor-test-env.sh | 2 +- tests/test-agent-teams.sh | 6 +- tests/test-allowlist-validators.sh | 2 +- tests/test-ansi-parsing.sh | 2 +- tests/test-ask-codex.sh | 4 +- tests/test-bash-validator-patterns.sh | 2 +- tests/test-cancel-signal-file.sh | 2 +- tests/test-codex-review-merge.sh | 2 +- tests/test-error-scenarios.sh | 2 +- tests/test-finalize-phase.sh | 10 ++-- tests/test-gen-plan.sh | 2 +- tests/test-helpers.sh | 2 +- tests/test-humanize-escape.sh | 2 +- tests/test-monitor-e2e-deletion.sh | 2 +- tests/test-monitor-e2e-real.sh | 10 ++-- tests/test-monitor-e2e-sigint.sh | 2 +- tests/test-monitor-runtime.sh | 12 ++-- tests/test-plan-file-hooks.sh | 4 +- tests/test-plan-file-validation.sh | 4 +- tests/test-pr-loop-1-scripts.sh | 2 +- tests/test-pr-loop-2-hooks.sh | 2 +- tests/test-pr-loop-3-stophook.sh | 2 +- tests/test-pr-loop-hooks.sh | 22 ++++---- tests/test-pr-loop-lib.sh | 6 +- tests/test-pr-loop-scripts.sh | 2 +- tests/test-pr-loop-stophook.sh | 56 +++++++++---------- tests/test-pr-loop-system.sh | 10 ++-- tests/test-pr-loop.sh | 2 +- tests/test-session-id.sh | 2 +- tests/test-skill-monitor.sh | 2 +- tests/test-state-exit-naming.sh | 2 +- tests/test-stop-gate.sh | 2 +- tests/test-template-loader.sh | 2 +- tests/test-template-references.sh | 2 +- tests/test-templates-comprehensive.sh | 2 +- tests/test-todo-checker.sh | 2 +- 87 files changed, 187 insertions(+), 168 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 2e7014c2..17bd1c3e 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Common functions for RLCR loop hooks # diff --git a/hooks/lib/template-loader.sh b/hooks/lib/template-loader.sh index 7f46853d..fdb488a6 100644 --- a/hooks/lib/template-loader.sh +++ b/hooks/lib/template-loader.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Template loading functions for RLCR loop hooks # diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index a9c394f3..65f3ca56 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PreToolUse Hook: Validate Bash commands for RLCR loop and PR loop # diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 0c1bd7b9..cad09969 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Stop Hook for RLCR loop # diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 76cf9c03..e59821ff 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PreToolUse Hook: Validate Edit paths for RLCR loop and PR loop # diff --git a/hooks/loop-plan-file-validator.sh b/hooks/loop-plan-file-validator.sh index 595d408a..4f336abc 100755 --- a/hooks/loop-plan-file-validator.sh +++ b/hooks/loop-plan-file-validator.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # UserPromptSubmit hook for plan file validation during RLCR loop # diff --git a/hooks/loop-post-bash-hook.sh b/hooks/loop-post-bash-hook.sh index eeed7bde..a0cc8268 100755 --- a/hooks/loop-post-bash-hook.sh +++ b/hooks/loop-post-bash-hook.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PostToolUse Bash Hook for RLCR loop # diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index f0b6f71f..421a2143 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PreToolUse Hook: Validate Read access for RLCR loop and PR loop files # diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 02090265..7c12b8a0 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PreToolUse Hook: Validate Write paths for RLCR loop and PR loop # diff --git a/hooks/pr-loop-stop-hook.sh b/hooks/pr-loop-stop-hook.sh index 86c8a7e0..18361001 100755 --- a/hooks/pr-loop-stop-hook.sh +++ b/hooks/pr-loop-stop-hook.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Stop Hook for PR loop # diff --git a/scripts/ask-codex.sh b/scripts/ask-codex.sh index 3a07ab43..c9927162 100755 --- a/scripts/ask-codex.sh +++ b/scripts/ask-codex.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Ask Codex - One-shot consultation with Codex # diff --git a/scripts/cancel-pr-loop.sh b/scripts/cancel-pr-loop.sh index f01c606b..388b536f 100755 --- a/scripts/cancel-pr-loop.sh +++ b/scripts/cancel-pr-loop.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Cancel script for cancel-pr-loop # diff --git a/scripts/cancel-rlcr-loop.sh b/scripts/cancel-rlcr-loop.sh index 907b051e..bc0f2511 100755 --- a/scripts/cancel-rlcr-loop.sh +++ b/scripts/cancel-rlcr-loop.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Cancel script for cancel-rlcr-loop # diff --git a/scripts/check-bot-reactions.sh b/scripts/check-bot-reactions.sh index 74a3372c..e14861a6 100755 --- a/scripts/check-bot-reactions.sh +++ b/scripts/check-bot-reactions.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Check bot reactions on PR or comments # diff --git a/scripts/check-pr-reviewer-status.sh b/scripts/check-pr-reviewer-status.sh index cb3e31fd..e4915c99 100755 --- a/scripts/check-pr-reviewer-status.sh +++ b/scripts/check-pr-reviewer-status.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Check PR reviewer status for startup case determination # diff --git a/scripts/fetch-pr-comments.sh b/scripts/fetch-pr-comments.sh index 2dc996b8..b4e892b1 100755 --- a/scripts/fetch-pr-comments.sh +++ b/scripts/fetch-pr-comments.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Fetch PR comments from GitHub # diff --git a/scripts/humanize.sh b/scripts/humanize.sh index 3c6b4546..01c6e1b6 100755 --- a/scripts/humanize.sh +++ b/scripts/humanize.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # humanize.sh - Humanize shell utilities # Part of rc.d configuration # Compatible with both bash and zsh diff --git a/scripts/install-skill.sh b/scripts/install-skill.sh index 3e891b02..1c1f817d 100755 --- a/scripts/install-skill.sh +++ b/scripts/install-skill.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Install/upgrade Humanize skills for Kimi and/or Codex. # diff --git a/scripts/install-skills-codex.sh b/scripts/install-skills-codex.sh index 16b0375f..45d7476e 100755 --- a/scripts/install-skills-codex.sh +++ b/scripts/install-skills-codex.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Convenience wrapper: install Humanize skills for Codex target. # diff --git a/scripts/install-skills-kimi.sh b/scripts/install-skills-kimi.sh index 7bcbb775..15a94dd4 100755 --- a/scripts/install-skills-kimi.sh +++ b/scripts/install-skills-kimi.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Convenience wrapper: install Humanize skills for Kimi target. # diff --git a/scripts/lib/monitor-common.sh b/scripts/lib/monitor-common.sh index 26bdaa9b..0e6af09f 100644 --- a/scripts/lib/monitor-common.sh +++ b/scripts/lib/monitor-common.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # monitor-common.sh - Shared utilities for humanize monitor functions # diff --git a/scripts/lib/monitor-skill.sh b/scripts/lib/monitor-skill.sh index 16fd4936..bd839d59 100644 --- a/scripts/lib/monitor-skill.sh +++ b/scripts/lib/monitor-skill.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # monitor-skill.sh - Skill monitor for humanize # diff --git a/scripts/poll-pr-reviews.sh b/scripts/poll-pr-reviews.sh index 4292dd2d..282899bd 100755 --- a/scripts/poll-pr-reviews.sh +++ b/scripts/poll-pr-reviews.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Poll for new PR reviews from specified bots # diff --git a/scripts/portable-timeout.sh b/scripts/portable-timeout.sh index 318ef72e..2dcd9308 100755 --- a/scripts/portable-timeout.sh +++ b/scripts/portable-timeout.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Portable timeout wrapper for macOS/Linux compatibility # Usage: source portable-timeout.sh; run_with_timeout [args...] diff --git a/scripts/rlcr-stop-gate.sh b/scripts/rlcr-stop-gate.sh index 306f875c..31616423 100755 --- a/scripts/rlcr-stop-gate.sh +++ b/scripts/rlcr-stop-gate.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Run RLCR stop-hook logic from non-hook environments (e.g. skill workflows). # diff --git a/scripts/setup-pr-loop.sh b/scripts/setup-pr-loop.sh index 85fabb9d..56ffeb2b 100755 --- a/scripts/setup-pr-loop.sh +++ b/scripts/setup-pr-loop.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Setup script for start-pr-loop # diff --git a/scripts/setup-rlcr-loop.sh b/scripts/setup-rlcr-loop.sh index 27bb439c..1974c58e 100755 --- a/scripts/setup-rlcr-loop.sh +++ b/scripts/setup-rlcr-loop.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Setup script for start-rlcr-loop # diff --git a/scripts/validate-gen-plan-io.sh b/scripts/validate-gen-plan-io.sh index e024df8a..a566b18b 100755 --- a/scripts/validate-gen-plan-io.sh +++ b/scripts/validate-gen-plan-io.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # validate-gen-plan-io.sh # Validates input and output paths for the gen-plan command # Exit codes: diff --git a/tests/manual-monitor-test.sh b/tests/manual-monitor-test.sh index f8d17e41..332b9548 100644 --- a/tests/manual-monitor-test.sh +++ b/tests/manual-monitor-test.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Manual Test Script for tests # diff --git a/tests/mocks/gh b/tests/mocks/gh index 5cf767d1..d91a58c9 100755 --- a/tests/mocks/gh +++ b/tests/mocks/gh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Mock gh CLI for testing PR loop functionality # diff --git a/tests/robustness/test-base-branch-detection.sh b/tests/robustness/test-base-branch-detection.sh index 92ce1825..bb31cd3a 100755 --- a/tests/robustness/test-base-branch-detection.sh +++ b/tests/robustness/test-base-branch-detection.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for base branch auto-detection # diff --git a/tests/robustness/test-cancel-security-robustness.sh b/tests/robustness/test-cancel-security-robustness.sh index 524cc19e..7ecc3c3c 100755 --- a/tests/robustness/test-cancel-security-robustness.sh +++ b/tests/robustness/test-cancel-security-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for cancel operation security # diff --git a/tests/robustness/test-concurrent-state-robustness.sh b/tests/robustness/test-concurrent-state-robustness.sh index 74ae84f6..ad72b3fc 100755 --- a/tests/robustness/test-concurrent-state-robustness.sh +++ b/tests/robustness/test-concurrent-state-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for concurrent state access # diff --git a/tests/robustness/test-git-operations-robustness.sh b/tests/robustness/test-git-operations-robustness.sh index d409c0ed..a80114c8 100755 --- a/tests/robustness/test-git-operations-robustness.sh +++ b/tests/robustness/test-git-operations-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for git operation scripts # diff --git a/tests/robustness/test-goal-tracker-robustness.sh b/tests/robustness/test-goal-tracker-robustness.sh index fe4c025b..0a9bc358 100755 --- a/tests/robustness/test-goal-tracker-robustness.sh +++ b/tests/robustness/test-goal-tracker-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for goal tracker parsing # diff --git a/tests/robustness/test-hook-input-robustness.sh b/tests/robustness/test-hook-input-robustness.sh index 6a6c77dd..675ff887 100755 --- a/tests/robustness/test-hook-input-robustness.sh +++ b/tests/robustness/test-hook-input-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for hook input parsing and monitor edge cases # @@ -453,7 +453,7 @@ cd "$MONITOR_TEST_DIR/project" # Create monitor runner script cat > "$MONITOR_TEST_DIR/run_monitor.sh" << 'MONITOR_EOF' -#!/bin/bash +#!/usr/bin/env bash PROJECT_DIR="$1" PROJECT_ROOT="$2" FAKE_HOME="$3" @@ -512,7 +512,7 @@ echo "Test log" > "$FAKE_HOME_MONITOR/.cache/humanize/$SANITIZED/2026-01-17_10-0 # Create narrow terminal runner - calls _humanize_monitor_codex directly in same shell cat > "$MONITOR_TEST_DIR/run_narrow.sh" << 'NARROW_EOF' -#!/bin/bash +#!/usr/bin/env bash PROJECT_DIR="$1" PROJECT_ROOT="$2" FAKE_HOME="$3" @@ -603,7 +603,7 @@ mkdir -p "$FAKE_HOME_MONITOR/.cache/humanize/$SANITIZED3/2026-01-17_11-00-00" printf '\033[31mRed text\033[0m\n\033[1;32mBold green\033[0m\n' > "$FAKE_HOME_MONITOR/.cache/humanize/$SANITIZED3/2026-01-17_11-00-00/round-1-codex-run.log" cat > "$MONITOR_TEST_DIR/run_ansi.sh" << 'ANSI_EOF' -#!/bin/bash +#!/usr/bin/env bash PROJECT_DIR="$1" PROJECT_ROOT="$2" FAKE_HOME="$3" diff --git a/tests/robustness/test-hook-system-robustness.sh b/tests/robustness/test-hook-system-robustness.sh index 8f302bb4..bfccf7f0 100755 --- a/tests/robustness/test-hook-system-robustness.sh +++ b/tests/robustness/test-hook-system-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for all hook scripts # @@ -592,7 +592,7 @@ cd - > /dev/null # Create mock codex to avoid real API calls (review_started: false triggers codex exec) mkdir -p "$TEST_DIR/mock-bin" cat > "$TEST_DIR/mock-bin/codex" << 'MOCKEOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex that returns review output indicating work continues echo "Review: Code looks good but more testing needed." echo "No COMPLETE or STOP markers - work should continue." diff --git a/tests/robustness/test-path-validation-robustness.sh b/tests/robustness/test-path-validation-robustness.sh index be7a916e..56b98528 100755 --- a/tests/robustness/test-path-validation-robustness.sh +++ b/tests/robustness/test-path-validation-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for path validation # @@ -22,7 +22,7 @@ setup_test_dir setup_mock_codex() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << 'MOCKEOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex for test-path-validation-robustness.sh echo "Mock codex output" exit 0 diff --git a/tests/robustness/test-plan-file-robustness.sh b/tests/robustness/test-plan-file-robustness.sh index 7ee5f186..d2f5ee7f 100755 --- a/tests/robustness/test-plan-file-robustness.sh +++ b/tests/robustness/test-plan-file-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for plan file validation # @@ -27,7 +27,7 @@ mkdir -p "$XDG_CACHE_HOME" setup_mock_codex() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << 'MOCKEOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex for test-plan-file-robustness.sh echo "Mock codex output" exit 0 diff --git a/tests/robustness/test-pr-loop-api-fetch.sh b/tests/robustness/test-pr-loop-api-fetch.sh index e549be08..489a1cee 100755 --- a/tests/robustness/test-pr-loop-api-fetch.sh +++ b/tests/robustness/test-pr-loop-api-fetch.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop API fetch/state tests (parallel split 1/2) # diff --git a/tests/robustness/test-pr-loop-api-poll.sh b/tests/robustness/test-pr-loop-api-poll.sh index 234130a6..f56d5594 100755 --- a/tests/robustness/test-pr-loop-api-poll.sh +++ b/tests/robustness/test-pr-loop-api-poll.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop API poll/stop-hook tests (parallel split 2/2) # diff --git a/tests/robustness/test-pr-loop-api-robustness.sh b/tests/robustness/test-pr-loop-api-robustness.sh index 20b5c165..d8e5097a 100755 --- a/tests/robustness/test-pr-loop-api-robustness.sh +++ b/tests/robustness/test-pr-loop-api-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for PR loop API handling # @@ -41,7 +41,7 @@ create_mock_gh() { # fetch-pr-comments.sh uses: gh repo view --json owner,name -q '...' # gh pr view PR --repo REPO --json number -q .number cat > "$dir/bin/gh" << 'GHEOF_START' -#!/bin/bash +#!/usr/bin/env bash # Mock gh command for testing # Check for -q flag anywhere in args (jq query) @@ -694,7 +694,7 @@ run_poll_tests() { # Create a mock gh that sleeps briefly but responds mkdir -p "$TEST_DIR/poll2/bin" cat > "$TEST_DIR/poll2/bin/gh" << 'GHEOF' -#!/bin/bash +#!/usr/bin/env bash # Handle repo view if [[ "$1" == "repo" && "$2" == "view" ]]; then if [[ "$*" == *"--json"* ]]; then @@ -761,7 +761,7 @@ GHEOF # Create a mock gh that fails on API calls mkdir -p "$TEST_DIR/poll3/bin" cat > "$TEST_DIR/poll3/bin/gh" << 'GHEOF' -#!/bin/bash +#!/usr/bin/env bash # Check for -q flag anywhere in args (jq query) HAS_Q_FLAG=false for arg in "$@"; do diff --git a/tests/robustness/test-session-robustness.sh b/tests/robustness/test-session-robustness.sh index 7b232e9f..4bc636af 100755 --- a/tests/robustness/test-session-robustness.sh +++ b/tests/robustness/test-session-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for concurrent session handling # diff --git a/tests/robustness/test-setup-scripts-robustness.sh b/tests/robustness/test-setup-scripts-robustness.sh index 13fe21bc..94fecad2 100755 --- a/tests/robustness/test-setup-scripts-robustness.sh +++ b/tests/robustness/test-setup-scripts-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for setup scripts # @@ -62,6 +62,23 @@ init_basic_git_repo() { cd - > /dev/null } +# Create a minimal PATH toolset in a test bin directory so scripts using +# '/usr/bin/env bash' still run even in restricted PATH scenarios. +prepare_runtime_bin() { + local bin_dir="$1" + local tool + local tool_path + + mkdir -p "$bin_dir" + + for tool in bash env git dirname cat sed awk grep mkdir date head od tr wc sort ls rm cp mv chmod ln readlink printf timeout gtimeout; do + tool_path=$(command -v "$tool" 2>/dev/null || true) + if [[ -n "$tool_path" && -x "$tool_path" && ! -e "$bin_dir/$tool" ]]; then + ln -s "$tool_path" "$bin_dir/$tool" + fi + done +} + # Run setup-rlcr-loop.sh with proper isolation from real RLCR loop # Usage: run_rlcr_setup [args...] run_rlcr_setup() { @@ -720,7 +737,7 @@ init_basic_git_repo "$TEST_DIR/repo30" # Create mock gh that fails auth check (to test dependency handling) mkdir -p "$TEST_DIR/repo30/bin" cat > "$TEST_DIR/repo30/bin/gh" << 'EOF' -#!/bin/bash +#!/usr/bin/env bash if [[ "$1" == "auth" && "$2" == "status" ]]; then echo "Not logged in" >&2 exit 1 @@ -816,7 +833,7 @@ REAL_GIT=$(command -v git) # Mock timeout that returns 124 for git rev-parse (first check in setup script) cat > "$TEST_DIR/repo34/bin/timeout" << TIMEOUTEOF -#!/bin/bash +#!/usr/bin/env bash # Mock timeout that returns 124 for git rev-parse to simulate timeout if [[ "\$*" == *"git"*"rev-parse"* ]]; then exit 124 @@ -833,7 +850,7 @@ chmod +x "$TEST_DIR/repo34/bin/gtimeout" # Create mock codex cat > "$TEST_DIR/repo34/bin/codex" << 'CODEXEOF' -#!/bin/bash +#!/usr/bin/env bash exit 0 CODEXEOF chmod +x "$TEST_DIR/repo34/bin/codex" @@ -1092,13 +1109,14 @@ git -C "$TEST_DIR/repo46" add .gitignore && git -C "$TEST_DIR/repo46" commit -q # Create bin dir with jq but no codex mkdir -p "$TEST_DIR/repo46/bin" +prepare_runtime_bin "$TEST_DIR/repo46/bin" cat > "$TEST_DIR/repo46/bin/jq" << 'EOF' -#!/bin/bash +#!/usr/bin/env bash exit 0 EOF chmod +x "$TEST_DIR/repo46/bin/jq" -# Hide system codex by making the only codex on PATH our empty bin dir -OUTPUT=$(PATH="$TEST_DIR/repo46/bin:/usr/bin:/bin" run_rlcr_setup "$TEST_DIR/repo46" plan.md 2>&1) || EXIT_CODE=$? +# Hide system codex by making the only codex on PATH our test bin dir +OUTPUT=$(PATH="$TEST_DIR/repo46/bin" run_rlcr_setup "$TEST_DIR/repo46" plan.md 2>&1) || EXIT_CODE=$? EXIT_CODE=${EXIT_CODE:-0} if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "Missing required dependencies" && echo "$OUTPUT" | grep -q "codex"; then pass "Missing codex detected in dependency check" @@ -1121,13 +1139,14 @@ git -C "$TEST_DIR/repo47" add .gitignore && git -C "$TEST_DIR/repo47" commit -q # Create bin dir with codex but no jq mkdir -p "$TEST_DIR/repo47/bin" +prepare_runtime_bin "$TEST_DIR/repo47/bin" cat > "$TEST_DIR/repo47/bin/codex" << 'EOF' -#!/bin/bash +#!/usr/bin/env bash exit 0 EOF chmod +x "$TEST_DIR/repo47/bin/codex" -# Use a restricted PATH that has git but no jq -OUTPUT=$(PATH="$TEST_DIR/repo47/bin:/usr/bin:/bin" run_rlcr_setup "$TEST_DIR/repo47" plan.md 2>&1) || EXIT_CODE=$? +# Use a restricted PATH with required runtime tools but no jq +OUTPUT=$(PATH="$TEST_DIR/repo47/bin" run_rlcr_setup "$TEST_DIR/repo47" plan.md 2>&1) || EXIT_CODE=$? EXIT_CODE=${EXIT_CODE:-0} if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "Missing required dependencies" && echo "$OUTPUT" | grep -q "jq"; then pass "Missing jq detected in dependency check" diff --git a/tests/robustness/test-state-file-robustness.sh b/tests/robustness/test-state-file-robustness.sh index b16171f9..9865a9ee 100755 --- a/tests/robustness/test-state-file-robustness.sh +++ b/tests/robustness/test-state-file-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for state file parsing # diff --git a/tests/robustness/test-state-transition-robustness.sh b/tests/robustness/test-state-transition-robustness.sh index 96eeb7da..6f6bdc62 100755 --- a/tests/robustness/test-state-transition-robustness.sh +++ b/tests/robustness/test-state-transition-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for state transition logic # diff --git a/tests/robustness/test-template-error-robustness.sh b/tests/robustness/test-template-error-robustness.sh index ccb3faac..7d9ae47a 100755 --- a/tests/robustness/test-template-error-robustness.sh +++ b/tests/robustness/test-template-error-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for template system error handling # diff --git a/tests/robustness/test-template-stress-robustness.sh b/tests/robustness/test-template-stress-robustness.sh index b599903b..0719c4ef 100755 --- a/tests/robustness/test-template-stress-robustness.sh +++ b/tests/robustness/test-template-stress-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for template system stress conditions # diff --git a/tests/robustness/test-timeout-robustness.sh b/tests/robustness/test-timeout-robustness.sh index 7d9276c9..03de1b92 100755 --- a/tests/robustness/test-timeout-robustness.sh +++ b/tests/robustness/test-timeout-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for timeout implementation # diff --git a/tests/run-all-tests.sh b/tests/run-all-tests.sh index d5cb5caf..1afafbee 100755 --- a/tests/run-all-tests.sh +++ b/tests/run-all-tests.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Run all test suites for the Humanize plugin (parallel execution) # @@ -98,7 +98,7 @@ trap "rm -rf $OUTPUT_DIR" EXIT if ! command -v codex &>/dev/null; then mkdir -p "$OUTPUT_DIR/mock-bin" cat > "$OUTPUT_DIR/mock-bin/codex" << 'MOCK_CODEX' -#!/bin/bash +#!/usr/bin/env bash exit 0 MOCK_CODEX chmod +x "$OUTPUT_DIR/mock-bin/codex" diff --git a/tests/setup-fixture-mock-gh.sh b/tests/setup-fixture-mock-gh.sh index eac3a8e3..f522bf58 100755 --- a/tests/setup-fixture-mock-gh.sh +++ b/tests/setup-fixture-mock-gh.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Create a mock gh CLI that returns fixture data for testing # fetch-pr-comments.sh and poll-pr-reviews.sh @@ -25,7 +25,7 @@ mkdir -p "$MOCK_BIN_DIR" # Create mock gh that returns fixtures cat > "$MOCK_BIN_DIR/gh" << MOCK_GH_EOF -#!/bin/bash +#!/usr/bin/env bash # Fixture-backed mock gh CLI for testing fetch/poll scripts FIXTURES_DIR="$FIXTURES_DIR" diff --git a/tests/setup-monitor-test-env.sh b/tests/setup-monitor-test-env.sh index 14673c6c..92fa9431 100755 --- a/tests/setup-monitor-test-env.sh +++ b/tests/setup-monitor-test-env.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Helper script to set up monitor test environment # This script creates the necessary directory structure and state files diff --git a/tests/test-agent-teams.sh b/tests/test-agent-teams.sh index 33c23df3..b55bc97b 100755 --- a/tests/test-agent-teams.sh +++ b/tests/test-agent-teams.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for --agent-teams feature in RLCR loop # @@ -498,7 +498,7 @@ setup_mock_codex_impl_feedback() { local feedback="$1" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << MOCK_EOF -#!/bin/bash +#!/usr/bin/env bash if [[ "\$1" == "exec" ]]; then cat << 'REVIEW' $feedback @@ -516,7 +516,7 @@ setup_mock_codex_review_issues() { local review_output="$1" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << MOCK_EOF -#!/bin/bash +#!/usr/bin/env bash if [[ "\$1" == "exec" ]]; then echo "Should not be called in review phase" elif [[ "\$1" == "review" ]]; then diff --git a/tests/test-allowlist-validators.sh b/tests/test-allowlist-validators.sh index 6c604965..53255928 100755 --- a/tests/test-allowlist-validators.sh +++ b/tests/test-allowlist-validators.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for allowlist behavior in RLCR loop validators # diff --git a/tests/test-ansi-parsing.sh b/tests/test-ansi-parsing.sh index 4394a8d7..8f70847f 100755 --- a/tests/test-ansi-parsing.sh +++ b/tests/test-ansi-parsing.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test ANSI escape code handling in test runner output parsing # diff --git a/tests/test-ask-codex.sh b/tests/test-ask-codex.sh index 17405f69..319ab383 100755 --- a/tests/test-ask-codex.sh +++ b/tests/test-ask-codex.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for ask-codex.sh - one-shot consultation with mock Codex # @@ -36,7 +36,7 @@ MOCK_BIN_DIR="$TEST_DIR/mock-bin" mkdir -p "$MOCK_BIN_DIR" cat > "$MOCK_BIN_DIR/codex" << 'MOCK_EOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex binary for testing ask-codex.sh # Controlled via environment variables. if [[ -n "${MOCK_CODEX_STDERR:-}" ]]; then diff --git a/tests/test-bash-validator-patterns.sh b/tests/test-bash-validator-patterns.sh index 62a491f7..bd7f07f4 100755 --- a/tests/test-bash-validator-patterns.sh +++ b/tests/test-bash-validator-patterns.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for command_modifies_file function in loop-common.sh # diff --git a/tests/test-cancel-signal-file.sh b/tests/test-cancel-signal-file.sh index 4d23eef3..9eb78d56 100755 --- a/tests/test-cancel-signal-file.sh +++ b/tests/test-cancel-signal-file.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for cancel-rlcr-loop signal file mechanism # diff --git a/tests/test-codex-review-merge.sh b/tests/test-codex-review-merge.sh index 8761bde7..ed1082bf 100755 --- a/tests/test-codex-review-merge.sh +++ b/tests/test-codex-review-merge.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for Code Review log file analysis behavior # diff --git a/tests/test-error-scenarios.sh b/tests/test-error-scenarios.sh index c05c1d8a..65930b1b 100755 --- a/tests/test-error-scenarios.sh +++ b/tests/test-error-scenarios.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test error scenarios for template-loader.sh # diff --git a/tests/test-finalize-phase.sh b/tests/test-finalize-phase.sh index f33fe1c1..4efc4220 100755 --- a/tests/test-finalize-phase.sh +++ b/tests/test-finalize-phase.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for Finalize Phase feature # @@ -55,7 +55,7 @@ setup_mock_codex() { local review_output="${2:-No issues found.}" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << EOF -#!/bin/bash +#!/usr/bin/env bash # Mock codex - outputs the provided content if [[ "\$1" == "exec" ]]; then cat << 'REVIEW' @@ -79,7 +79,7 @@ setup_mock_codex_with_tracking() { local review_output="${2:-No issues found.}" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << EOF -#!/bin/bash +#!/usr/bin/env bash # Track that codex was called echo "CODEX_WAS_CALLED" > "$TEST_DIR/codex_called.marker" if [[ "\$1" == "exec" ]]; then @@ -104,7 +104,7 @@ setup_mock_codex_review_failure() { local review_exit_code="${2:-1}" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << EOF -#!/bin/bash +#!/usr/bin/env bash # Mock codex - fails on review command if [[ "\$1" == "exec" ]]; then cat << 'REVIEW' @@ -126,7 +126,7 @@ setup_mock_codex_review_empty_stdout() { local exec_output="$1" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << EOF -#!/bin/bash +#!/usr/bin/env bash # Mock codex - produces empty stdout on review if [[ "\$1" == "exec" ]]; then cat << 'REVIEW' diff --git a/tests/test-gen-plan.sh b/tests/test-gen-plan.sh index 51abab50..66e346fa 100755 --- a/tests/test-gen-plan.sh +++ b/tests/test-gen-plan.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for gen-plan command structure validation # diff --git a/tests/test-helpers.sh b/tests/test-helpers.sh index a93a2329..2fafff3b 100644 --- a/tests/test-helpers.sh +++ b/tests/test-helpers.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Shared test helper functions for all test scripts # diff --git a/tests/test-humanize-escape.sh b/tests/test-humanize-escape.sh index 8731cc98..644a349f 100755 --- a/tests/test-humanize-escape.sh +++ b/tests/test-humanize-escape.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for humanize-escape fixes # diff --git a/tests/test-monitor-e2e-deletion.sh b/tests/test-monitor-e2e-deletion.sh index bbca2c6d..afd738e6 100755 --- a/tests/test-monitor-e2e-deletion.sh +++ b/tests/test-monitor-e2e-deletion.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Monitor e2e deletion tests (parallel split 1/3) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/tests/test-monitor-e2e-real.sh b/tests/test-monitor-e2e-real.sh index f0c8d0b0..1ef8d409 100755 --- a/tests/test-monitor-e2e-real.sh +++ b/tests/test-monitor-e2e-real.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # TRUE End-to-End Monitor Tests for monitor tests # @@ -105,7 +105,7 @@ GOALTRACKER_EOF1 # Create the test runner script # This script runs the REAL _humanize_monitor_codex function cat > "$TEST_PROJECT/run_real_monitor.sh" << 'MONITOR_SCRIPT' -#!/bin/bash +#!/usr/bin/env bash # Run the REAL _humanize_monitor_codex function PROJECT_DIR="$1" @@ -426,7 +426,7 @@ GOALTRACKER_SIGINT # Create the test runner script for SIGINT test cat > "$TEST_PROJECT_SIGINT/run_real_monitor_sigint.sh" << 'SIGINT_SCRIPT_EOF' -#!/bin/bash +#!/usr/bin/env bash # Run the REAL _humanize_monitor_codex function for SIGINT testing PROJECT_DIR="$1" @@ -747,7 +747,7 @@ GOALTRACKER_EOF # Create bash test runner script for PR monitor cat > "$TEST_PROJECT_PR/run_real_monitor_pr.sh" << 'MONITOR_SCRIPT' -#!/bin/bash +#!/usr/bin/env bash # Run the REAL _humanize_monitor_pr function PROJECT_DIR="$1" @@ -890,7 +890,7 @@ PR_GOAL_EOF # Create bash test runner script for PR monitor without --once cat > "$TEST_PROJECT_PR_NO_ONCE/run_real_monitor_pr_no_once.sh" << 'PR_NO_ONCE_EOF' -#!/bin/bash +#!/usr/bin/env bash # Run the REAL _humanize_monitor_pr function WITHOUT --once flag PROJECT_DIR="$1" diff --git a/tests/test-monitor-e2e-sigint.sh b/tests/test-monitor-e2e-sigint.sh index 9a354a2b..a6bfe20a 100755 --- a/tests/test-monitor-e2e-sigint.sh +++ b/tests/test-monitor-e2e-sigint.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Monitor e2e SIGINT tests (parallel split 2/3) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/tests/test-monitor-runtime.sh b/tests/test-monitor-runtime.sh index f73256c0..e146adaf 100755 --- a/tests/test-monitor-runtime.sh +++ b/tests/test-monitor-runtime.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Runtime Verification Tests for tests # @@ -63,7 +63,7 @@ echo "current_round: 1" > .humanize/rlcr/2026-01-16_10-00-00/state.md # Create a test script that sources humanize.sh and tests the graceful stop behavior cat > test_graceful_stop.sh << 'TESTSCRIPT' -#!/bin/bash +#!/usr/bin/env bash cd "$1" # Source the monitor script @@ -141,7 +141,7 @@ echo "Test 2: Verify cleanup prevents double execution" echo "" cat > test_double_cleanup.sh << 'TESTSCRIPT' -#!/bin/bash +#!/usr/bin/env bash cleanup_done=false call_count=0 @@ -184,7 +184,7 @@ echo "Test 3: Main loop directory deletion detection" echo "" cat > test_loop_detection.sh << 'TESTSCRIPT' -#!/bin/bash +#!/usr/bin/env bash cd "$1" loop_dir=".humanize/rlcr" @@ -261,7 +261,7 @@ echo "" # and would reset the scroll region cat > test_terminal_restore.sh << 'TESTSCRIPT' -#!/bin/bash +#!/usr/bin/env bash # Test that _restore_terminal is defined and callable cd "$1" @@ -331,7 +331,7 @@ echo "Test 6: SIGINT triggers cleanup in bash" echo "" cat > test_sigint_bash.sh << 'TESTSCRIPT' -#!/bin/bash +#!/usr/bin/env bash # Test that SIGINT triggers cleanup in bash mode cleanup_done=false diff --git a/tests/test-plan-file-hooks.sh b/tests/test-plan-file-hooks.sh index d2e8af6f..00bd7138 100755 --- a/tests/test-plan-file-hooks.sh +++ b/tests/test-plan-file-hooks.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for plan file hooks during RLCR loop # @@ -40,7 +40,7 @@ mkdir -p "$XDG_CACHE_HOME" setup_mock_codex() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << 'MOCKEOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex for test-plan-file-hooks.sh if [[ "$1" == "exec" ]]; then echo "Mock review output" diff --git a/tests/test-plan-file-validation.sh b/tests/test-plan-file-validation.sh index 411c71f9..1fb10553 100755 --- a/tests/test-plan-file-validation.sh +++ b/tests/test-plan-file-validation.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for plan file validation in setup-rlcr-loop.sh # @@ -74,7 +74,7 @@ EOF mock_codex() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << 'EOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex for test-plan-file-validation.sh echo "mock codex" EOF diff --git a/tests/test-pr-loop-1-scripts.sh b/tests/test-pr-loop-1-scripts.sh index a4088b71..38ccd846 100755 --- a/tests/test-pr-loop-1-scripts.sh +++ b/tests/test-pr-loop-1-scripts.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop Script Tests Runner (parallel split 1/3) # diff --git a/tests/test-pr-loop-2-hooks.sh b/tests/test-pr-loop-2-hooks.sh index 254bdbdb..56f6219e 100755 --- a/tests/test-pr-loop-2-hooks.sh +++ b/tests/test-pr-loop-2-hooks.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop Hook Tests Runner (parallel split 2/3) # diff --git a/tests/test-pr-loop-3-stophook.sh b/tests/test-pr-loop-3-stophook.sh index a17c27a8..6a9149f5 100755 --- a/tests/test-pr-loop-3-stophook.sh +++ b/tests/test-pr-loop-3-stophook.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop Stop Hook Tests Runner (parallel split 3/3) # diff --git a/tests/test-pr-loop-hooks.sh b/tests/test-pr-loop-hooks.sh index 1e8c6ca1..de4d09f7 100644 --- a/tests/test-pr-loop-hooks.sh +++ b/tests/test-pr-loop-hooks.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop Hook Tests # @@ -482,7 +482,7 @@ create_enhanced_mock_gh() { local trigger_timestamp="${3:-2026-01-18T12:00:00Z}" cat > "$mock_dir/gh" << MOCK_GH -#!/bin/bash +#!/usr/bin/env bash # Enhanced mock gh CLI for stop hook testing case "\$1" in @@ -542,7 +542,7 @@ test_trigger_user_filter() { # Create mock that returns comments from different users cat > "$test_subdir/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -794,7 +794,7 @@ EOF mkdir -p "$mock_bin" cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -816,7 +816,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) echo "/tmp/git" ;; status) echo "" ;; @@ -877,7 +877,7 @@ EOF # Mock gh that properly returns jq-parsed user and trigger comments cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -916,7 +916,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) echo "/tmp/git" ;; status) echo "" ;; @@ -979,7 +979,7 @@ EOF # Mock gh that simulates paginated response (returns multiple JSON arrays) # The trigger comment is on page 2 (second array) - only visible if pagination works cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -1023,7 +1023,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) echo "/tmp/git" ;; status) echo "" ;; @@ -1084,7 +1084,7 @@ EOF mkdir -p "$mock_bin" cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -1112,7 +1112,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) echo "/tmp/git" ;; status) echo "" ;; diff --git a/tests/test-pr-loop-lib.sh b/tests/test-pr-loop-lib.sh index 3d7693cb..a619e052 100644 --- a/tests/test-pr-loop-lib.sh +++ b/tests/test-pr-loop-lib.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Common library for PR loop tests # @@ -30,7 +30,7 @@ if [[ -z "${TEST_PR_LOOP_LIB_LOADED:-}" ]]; then mkdir -p "$mock_dir" cat > "$mock_dir/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash # Mock gh CLI for testing case "$1" in @@ -92,7 +92,7 @@ MOCK_GH local mock_dir="$1" cat > "$mock_dir/codex" << 'MOCK_CODEX' -#!/bin/bash +#!/usr/bin/env bash # Mock codex CLI for testing echo "Mock codex output" exit 0 diff --git a/tests/test-pr-loop-scripts.sh b/tests/test-pr-loop-scripts.sh index b1ce5a42..d77b9067 100644 --- a/tests/test-pr-loop-scripts.sh +++ b/tests/test-pr-loop-scripts.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop Script Tests # diff --git a/tests/test-pr-loop-stophook.sh b/tests/test-pr-loop-stophook.sh index 1e71dcdf..a73f8a4b 100644 --- a/tests/test-pr-loop-stophook.sh +++ b/tests/test-pr-loop-stophook.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop Stop Hook Tests # @@ -56,7 +56,7 @@ EOF # Mock gh that returns OLD trigger comment (BEFORE latest_commit_at) cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash # Check if --jq is in arguments (for transformed format) HAS_JQ=false for arg in "$@"; do @@ -103,7 +103,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -175,7 +175,7 @@ EOF # Mock gh that returns no trigger comments, but has codex +1 cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -206,7 +206,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -281,13 +281,13 @@ EOF mkdir -p "$mock_bin" cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash exit 0 MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -363,7 +363,7 @@ EOF # Mock gh that returns bot comments (simulating comments arriving) cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -411,7 +411,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -485,14 +485,14 @@ EOF mkdir -p "$mock_bin" cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash exit 0 MOCK_GH chmod +x "$mock_bin/gh" # Mock git that reports unpushed commits cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -571,7 +571,7 @@ EOF mkdir -p "$mock_bin" cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in pr) if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then @@ -595,7 +595,7 @@ MOCK_GH # Mock git that simulates force push: old commit is NOT ancestor of current HEAD cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -676,7 +676,7 @@ EOF # Mock gh that returns no trigger comments cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -711,7 +711,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -788,7 +788,7 @@ EOF # Mock gh that returns NO bot comments (simulates bot not responding) cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -820,7 +820,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -930,7 +930,7 @@ EOF # Mock gh that returns +1 reaction from codex cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -966,7 +966,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -1045,7 +1045,7 @@ EOF # Mock gh that returns NO eyes reaction (simulates claude bot not configured) cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash # Check if --jq is in arguments (for transformed format) HAS_JQ=false for arg in "$@"; do @@ -1101,7 +1101,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -1193,7 +1193,7 @@ EOF # check-pr-reviewer-status.sh uses --jq so needs transformed format # Use COMMENT_TS environment variable for dynamic timestamp cat > "$mock_bin/gh" << MOCK_GH -#!/bin/bash +#!/usr/bin/env bash # Dynamic comment timestamp from test setup COMMENT_TS="$comment_ts" COMMIT_TS="$commit_ts" @@ -1307,7 +1307,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -1425,7 +1425,7 @@ EOF # - Current repo (fork) doesn't have PR 456 # - Parent repo (upstream) has PR 456 cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash # Track which repo we're querying FORK_REPO="forkuser/forkrepo" UPSTREAM_REPO="upstreamowner/upstreamrepo" @@ -1475,7 +1475,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -1583,7 +1583,7 @@ EOF # - claude: APPROVE (LGTM) # - codex: ISSUES (has issues) cat > "$mock_bin/gh" << MOCK_GH -#!/bin/bash +#!/usr/bin/env bash # Dynamic timestamps from test setup CLAUDE_TS="$claude_ts" CODEX_TS="$codex_ts" @@ -1653,7 +1653,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -1677,7 +1677,7 @@ MOCK_GIT # Mock codex that outputs mixed approval cat > "$mock_bin/codex" << 'MOCK_CODEX' -#!/bin/bash +#!/usr/bin/env bash # Mock codex output: claude approves, codex has issues cat << 'CODEX_OUTPUT' # PR Review Validation diff --git a/tests/test-pr-loop-system.sh b/tests/test-pr-loop-system.sh index e124cb6f..05cf3b87 100755 --- a/tests/test-pr-loop-system.sh +++ b/tests/test-pr-loop-system.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test runner for PR loop system # @@ -910,7 +910,7 @@ run_monitor_once_capture_output() { # Create wrapper script that runs monitor and captures output local wrapper="$project_dir/run_monitor_test.sh" cat > "$wrapper" << 'WRAPPER_EOF' -#!/bin/bash +#!/usr/bin/env bash PROJECT_DIR="$1" PROJECT_ROOT="$2" @@ -1597,7 +1597,7 @@ EOF mkdir -p "$mock_bin" cat > "$mock_bin/gh" << MOCK_GH -#!/bin/bash +#!/usr/bin/env bash COMMENT_TS="$comment_ts" COMMIT_TS="$commit_ts" @@ -1669,7 +1669,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -1691,7 +1691,7 @@ MOCK_GIT # Mock codex command - returns ISSUES_REMAINING to trigger goal tracker update cat > "$mock_bin/codex" << 'MOCK_CODEX' -#!/bin/bash +#!/usr/bin/env bash # Mock codex for testing - output review analysis cat << 'CODEX_OUTPUT' ## Bot Review Analysis diff --git a/tests/test-pr-loop.sh b/tests/test-pr-loop.sh index 54af3829..0bb615b8 100755 --- a/tests/test-pr-loop.sh +++ b/tests/test-pr-loop.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for PR loop feature # diff --git a/tests/test-session-id.sh b/tests/test-session-id.sh index fa28b9de..0d2656a0 100755 --- a/tests/test-session-id.sh +++ b/tests/test-session-id.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for session_id feature in RLCR loop # diff --git a/tests/test-skill-monitor.sh b/tests/test-skill-monitor.sh index f50babdf..3ccebce5 100755 --- a/tests/test-skill-monitor.sh +++ b/tests/test-skill-monitor.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for _humanize_monitor_skill (humanize monitor skill) # diff --git a/tests/test-state-exit-naming.sh b/tests/test-state-exit-naming.sh index d48c8a95..7982baac 100755 --- a/tests/test-state-exit-naming.sh +++ b/tests/test-state-exit-naming.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for state.md rename on exit # diff --git a/tests/test-stop-gate.sh b/tests/test-stop-gate.sh index 7682c853..d3238c49 100755 --- a/tests/test-stop-gate.sh +++ b/tests/test-stop-gate.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for rlcr-stop-gate wrapper project root detection # diff --git a/tests/test-template-loader.sh b/tests/test-template-loader.sh index 74bbd784..e9d48639 100755 --- a/tests/test-template-loader.sh +++ b/tests/test-template-loader.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for template-loader.sh # diff --git a/tests/test-template-references.sh b/tests/test-template-references.sh index a302ad19..13d4c31b 100755 --- a/tests/test-template-references.sh +++ b/tests/test-template-references.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Template Reference Validation # diff --git a/tests/test-templates-comprehensive.sh b/tests/test-templates-comprehensive.sh index 0e7e8f3c..bf8c38af 100755 --- a/tests/test-templates-comprehensive.sh +++ b/tests/test-templates-comprehensive.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Comprehensive template validation tests for CI/CD # diff --git a/tests/test-todo-checker.sh b/tests/test-todo-checker.sh index b3e7b072..8588bf3b 100755 --- a/tests/test-todo-checker.sh +++ b/tests/test-todo-checker.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for check-todos-from-transcript.py # From 43032491dcb2c35fc9179f99a3fe446758ba857e Mon Sep 17 00:00:00 2001 From: Qiming Chu Date: Tue, 17 Mar 2026 22:34:35 +0800 Subject: [PATCH 21/97] Bump version to 1.14.1 for branch CI validation Co-Authored-By: Claude Opus 4.6 --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index d433af17..af4c0ce6 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.14.0" + "version": "1.14.1" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index d53f704d..8a69a6c7 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.14.0", + "version": "1.14.1", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index dac0f88f..d864489a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.14.0** +**Current Version: 1.14.1** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. From 4deb1db636e01586c4e687034e93635999e184e1 Mon Sep 17 00:00:00 2001 From: Chao Liu Date: Wed, 18 Mar 2026 21:48:29 +0800 Subject: [PATCH 22/97] Harden template-loader error handling and reduce code duplication - Remove spurious blank stderr line from load_template on missing files - Add awk exit code propagation in render_template to surface failures - Extract _emit_fallback helper to deduplicate load_and_render_safe - Prevent append_template from appending empty content on missing templates - Extend validate_template_dir to check all required subdirs (plan, pr-loop) - Bump version to 1.14.1 Signed-off-by: Chao Liu --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- hooks/lib/template-loader.sh | 58 +++++++++++++++++++++------------ 4 files changed, 40 insertions(+), 24 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index d433af17..af4c0ce6 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.14.0" + "version": "1.14.1" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index d53f704d..8a69a6c7 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.14.0", + "version": "1.14.1", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index dac0f88f..d864489a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.14.0** +**Current Version: 1.14.1** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. diff --git a/hooks/lib/template-loader.sh b/hooks/lib/template-loader.sh index 7f46853d..028a66b3 100644 --- a/hooks/lib/template-loader.sh +++ b/hooks/lib/template-loader.sh @@ -41,9 +41,7 @@ load_template() { if [[ -f "$template_path" ]]; then cat "$template_path" else - echo "" >&2 echo "Warning: Template not found: $template_path" >&2 - echo "" fi } @@ -71,6 +69,7 @@ render_template() { # Single-pass replacement using awk # Scans for {{VAR}} patterns and replaces them with values from environment # Replaced content goes directly to output without re-scanning + local awk_exit=0 content=$(env "${env_vars[@]}" awk ' BEGIN { # Build lookup table from environment variables with TMPL_VAR_ prefix @@ -126,7 +125,12 @@ render_template() { } print result - }' <<< "$content") + }' <<< "$content") || awk_exit=$? + + if [[ $awk_exit -ne 0 ]]; then + echo "Error: Template rendering failed (awk exit code: $awk_exit)" >&2 + return 1 + fi echo "$content" } @@ -148,22 +152,36 @@ load_and_render() { # Append content from another template file # Usage: append_template "$base_content" "$TEMPLATE_DIR" "claude/post-alignment.md" +# Only appends if the template exists and is non-empty. append_template() { local base_content="$1" local template_dir="$2" local template_name="$3" local additional_content - additional_content=$(load_template "$template_dir" "$template_name") + additional_content=$(load_template "$template_dir" "$template_name" 2>/dev/null) || true echo "$base_content" - echo "$additional_content" + if [[ -n "$additional_content" ]]; then + echo "$additional_content" + fi } # ======================================== # Safe versions with fallback messages # ======================================== +# Emit a fallback message, optionally rendering template variables. +_emit_fallback() { + local fallback_msg="$1" + shift + if [[ $# -gt 0 ]]; then + render_template "$fallback_msg" "$@" + else + echo "$fallback_msg" + fi +} + # Load and render with a fallback message if template fails # Usage: load_and_render_safe "$TEMPLATE_DIR" "block/message.md" "fallback message" "VAR=value" ... # Returns fallback message if template is missing or empty @@ -174,28 +192,18 @@ load_and_render_safe() { shift 3 local content - content=$(load_template "$template_dir" "$template_name" 2>/dev/null) + content=$(load_template "$template_dir" "$template_name" 2>/dev/null) || true if [[ -z "$content" ]]; then - # Template missing - use fallback with variable substitution - if [[ $# -gt 0 ]]; then - render_template "$fallback_msg" "$@" - else - echo "$fallback_msg" - fi + _emit_fallback "$fallback_msg" "$@" return fi local result - result=$(render_template "$content" "$@") + result=$(render_template "$content" "$@") || true if [[ -z "$result" ]]; then - # Rendering produced empty result - use fallback - if [[ $# -gt 0 ]]; then - render_template "$fallback_msg" "$@" - else - echo "$fallback_msg" - fi + _emit_fallback "$fallback_msg" "$@" return fi @@ -213,8 +221,16 @@ validate_template_dir() { return 1 fi - if [[ ! -d "$template_dir/block" ]] || [[ ! -d "$template_dir/codex" ]] || [[ ! -d "$template_dir/claude" ]]; then - echo "ERROR: Template directory missing subdirectories: $template_dir" >&2 + local required_subdirs=("block" "codex" "claude" "plan" "pr-loop") + local missing=() + local subdir + for subdir in "${required_subdirs[@]}"; do + if [[ ! -d "$template_dir/$subdir" ]]; then + missing+=("$subdir") + fi + done + if [[ ${#missing[@]} -gt 0 ]]; then + echo "ERROR: Template directory missing subdirectories (${missing[*]}): $template_dir" >&2 return 1 fi From 084464851a2c0a6c0e56358e49d14a1bd6c53938 Mon Sep 17 00:00:00 2001 From: Zhou Yaoyang Date: Sun, 15 Mar 2026 17:04:31 +0800 Subject: [PATCH 23/97] Add native Codex hook support and harden install/test flows - add native Codex hook config and installer, wire RLCR/PR stop hooks, and document Codex setup and usage - fix BitLesson selector routing and update related skills/docs - simplify install-skill target handling and harden RLCR test mock argument parsing --- .gitignore | 2 + config/codex-hooks.json | 23 ++ docs/bitlesson.md | 4 + docs/install-for-codex.md | 33 ++- docs/usage.md | 5 + hooks/loop-codex-stop-hook.sh | 11 +- hooks/pr-loop-stop-hook.sh | 5 +- scripts/bitlesson-select.sh | 77 +++-- scripts/install-codex-hooks.sh | 197 +++++++++++++ scripts/install-skill.sh | 226 ++++++++++++++- scripts/rlcr-stop-gate.sh | 7 + skills/humanize-rlcr/SKILL.md | 40 +-- skills/humanize/SKILL.md | 9 +- tests/run-all-tests.sh | 1 + tests/test-agent-teams.sh | 22 +- tests/test-bitlesson-select-routing.sh | 131 ++++++++- tests/test-codex-hook-install.sh | 340 +++++++++++++++++++++++ tests/test-disable-nested-codex-hooks.sh | 213 ++++++++++++++ tests/test-finalize-phase.sh | 44 ++- tests/test-task-tag-routing.sh | 13 +- 20 files changed, 1301 insertions(+), 102 deletions(-) create mode 100644 config/codex-hooks.json create mode 100755 scripts/install-codex-hooks.sh create mode 100755 tests/test-codex-hook-install.sh create mode 100644 tests/test-disable-nested-codex-hooks.sh diff --git a/.gitignore b/.gitignore index 8ef0d573..2308545e 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,8 @@ temp # Humanize state directories (runtime-generated, project-local) .humanize/ +.claude-flow/ +.swarm/ # Python cache __pycache__/ diff --git a/config/codex-hooks.json b/config/codex-hooks.json new file mode 100644 index 00000000..7a04402a --- /dev/null +++ b/config/codex-hooks.json @@ -0,0 +1,23 @@ +{ + "description": "Humanize Codex Hooks - Native Stop hooks for RLCR and PR loops", + "hooks": { + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "{{HUMANIZE_RUNTIME_ROOT}}/hooks/loop-codex-stop-hook.sh", + "timeout": 7200, + "statusMessage": "humanize RLCR stop hook" + }, + { + "type": "command", + "command": "{{HUMANIZE_RUNTIME_ROOT}}/hooks/pr-loop-stop-hook.sh", + "timeout": 7200, + "statusMessage": "humanize PR stop hook" + } + ] + } + ] + } +} diff --git a/docs/bitlesson.md b/docs/bitlesson.md index bb2c3bac..01bb32e5 100644 --- a/docs/bitlesson.md +++ b/docs/bitlesson.md @@ -18,6 +18,10 @@ Provider routing is automatic: If the configured provider binary is missing, the selector falls back to the default Codex model so the loop can still proceed. +On Codex-only installs, Humanize writes `provider_mode: "codex-only"` into the user config. +When that mode is present, the selector forces BitLesson selection onto the Codex/OpenAI path +before provider resolution, even if an older default such as `haiku` would otherwise route to Claude. + ## Workflow Each project keeps its BitLesson knowledge base at `.humanize/bitlesson.md`. diff --git a/docs/install-for-codex.md b/docs/install-for-codex.md index a0c5dac2..8698d001 100644 --- a/docs/install-for-codex.md +++ b/docs/install-for-codex.md @@ -1,6 +1,6 @@ # Install Humanize Skills for Codex -This guide explains how to install the Humanize skills for Codex skill runtime (`$CODEX_HOME/skills`). +This guide explains how to install Humanize for Codex CLI, including the skill runtime (`$CODEX_HOME/skills`) and the native Codex `Stop` hook (`$CODEX_HOME/hooks.json`). ## Quick Install (Recommended) @@ -25,8 +25,14 @@ Or use the unified installer directly: This will: - Sync `humanize`, `humanize-gen-plan`, `humanize-refine-plan`, and `humanize-rlcr` into `${CODEX_HOME:-~/.codex}/skills` - Copy runtime dependencies into `${CODEX_HOME:-~/.codex}/skills/humanize` +- Install/update native Humanize Stop hooks in `${CODEX_HOME:-~/.codex}/hooks.json` +- Enable the experimental `codex_hooks` feature in `${CODEX_HOME:-~/.codex}/config.toml` when `codex` is available +- Seed `~/.config/humanize/config.json` with a Codex/OpenAI `bitlesson_model` when that key is not already set +- Mark the install as `provider_mode: "codex-only"` when using `--target codex` - Use RLCR defaults: `codex exec` with `gpt-5.4:high`, `codex review` with `gpt-5.4:high` +Requires Codex CLI `0.114.0` or newer for native hooks. Older Codex builds are not supported by the Codex install path. + ## Verify ```bash @@ -58,6 +64,21 @@ Installed files/directories: - `${CODEX_HOME:-~/.codex}/skills/humanize/templates/` - `${CODEX_HOME:-~/.codex}/skills/humanize/config/` - `${CODEX_HOME:-~/.codex}/skills/humanize/agents/` +- `${CODEX_HOME:-~/.codex}/hooks.json` +- `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` (created or updated only when Humanize config keys are unset) + +Verify native hooks: + +```bash +codex features list | rg codex_hooks +sed -n '1,220p' "${CODEX_HOME:-$HOME/.codex}/hooks.json" +``` + +Expected: +- `codex_hooks` is `true` +- `hooks.json` contains `loop-codex-stop-hook.sh` and `pr-loop-stop-hook.sh` +- `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` contains `bitlesson_model` set to a Codex/OpenAI model such as `gpt-5.4` +- for `--target codex`, `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` also contains `provider_mode: "codex-only"` ## Optional: Install for Both Codex and Kimi @@ -73,6 +94,9 @@ Installed files/directories: # Custom Codex skills dir ./scripts/install-skills-codex.sh --codex-skills-dir /custom/codex/skills + +# Reinstall only the native hooks/config +./scripts/install-codex-hooks.sh ``` ## Troubleshooting @@ -82,3 +106,10 @@ If scripts are not found from installed skills: ```bash ls -la "${CODEX_HOME:-$HOME/.codex}/skills/humanize/scripts" ``` + +If native exit gating does not trigger: + +```bash +codex features enable codex_hooks +sed -n '1,220p' "${CODEX_HOME:-$HOME/.codex}/hooks.json" +``` diff --git a/docs/usage.md b/docs/usage.md index e12d45b9..b5625bec 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -277,6 +277,7 @@ Current built-in keys: | `codex_model` | `gpt-5.4` | Shared default model for Codex-backed review and analysis | | `codex_effort` | `high` | Shared default reasoning effort (`xhigh`, `high`, `medium`, `low`) | | `bitlesson_model` | `haiku` | Model used by the BitLesson selector agent | +| `provider_mode` | unset | Optional runtime mode hint such as `codex-only` | | `agent_teams` | `false` | Project-level default for agent teams workflow | | `alternative_plan_language` | `""` | Optional translated plan variant language; supported values include `Chinese`, `Korean`, `Japanese`, `Spanish`, `French`, `German`, `Portuguese`, `Russian`, `Arabic`, or ISO codes like `zh` | | `gen_plan_mode` | `discussion` | Default plan-generation mode | @@ -300,6 +301,10 @@ To override, add to `.humanize/config.json`: } ``` +On Codex installs, Humanize also seeds `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` +with a Codex/OpenAI `bitlesson_model` and `provider_mode: "codex-only"` when those keys +are unset, so BitLesson selection stays on the Codex/OpenAI path without probing Claude. + Codex model is resolved with this precedence: 1. CLI `--codex-model` flag (highest priority) 2. Feature-specific defaults (e.g., PR loop defaults to `medium` effort) diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 4d397a19..ae703d30 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -980,6 +980,9 @@ mkdir -p "$CACHE_DIR" # portable-timeout.sh already sourced above +# Disable native hooks for nested Codex reviewer calls to prevent Stop-hook recursion. +CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) + # Build command arguments for summary review (codex exec) CODEX_EXEC_ARGS=("-m" "$CODEX_EXEC_MODEL") if [[ -n "$CODEX_EXEC_EFFORT" ]]; then @@ -1056,14 +1059,14 @@ Provider: codex echo "# Review base ($review_base_type): $review_base" echo "# Timeout: $CODEX_TIMEOUT seconds" echo "" - echo "codex review --base $review_base ${CODEX_REVIEW_ARGS[*]}" + echo "codex ${CODEX_DISABLE_HOOKS_ARGS[*]} review --base $review_base ${CODEX_REVIEW_ARGS[*]}" } > "$CODEX_REVIEW_CMD_FILE" echo "Code review command saved to: $CODEX_REVIEW_CMD_FILE" >&2 echo "Running codex review with timeout ${CODEX_TIMEOUT}s in $PROJECT_ROOT (base: $review_base)..." >&2 CODEX_REVIEW_EXIT_CODE=0 - (cd "$PROJECT_ROOT" && run_with_timeout "$CODEX_TIMEOUT" codex review --base "$review_base" "${CODEX_REVIEW_ARGS[@]}") \ + (cd "$PROJECT_ROOT" && run_with_timeout "$CODEX_TIMEOUT" codex "${CODEX_DISABLE_HOOKS_ARGS[@]}" review --base "$review_base" "${CODEX_REVIEW_ARGS[@]}") \ > "$CODEX_REVIEW_LOG_FILE" 2>&1 || CODEX_REVIEW_EXIT_CODE=$? echo "Code review exit code: $CODEX_REVIEW_EXIT_CODE" >&2 @@ -1387,7 +1390,7 @@ CODEX_PROMPT_CONTENT=$(cat "$REVIEW_PROMPT_FILE") echo "# Working directory: $PROJECT_ROOT" echo "# Timeout: $CODEX_TIMEOUT seconds" echo "" - echo "codex exec ${CODEX_EXEC_ARGS[*]} \"\"" + echo "codex ${CODEX_DISABLE_HOOKS_ARGS[*]} exec ${CODEX_EXEC_ARGS[*]} \"\"" echo "" echo "# Prompt content:" echo "$CODEX_PROMPT_CONTENT" @@ -1397,7 +1400,7 @@ echo "Codex command saved to: $CODEX_CMD_FILE" >&2 echo "Running summary review with timeout ${CODEX_TIMEOUT}s..." >&2 CODEX_EXIT_CODE=0 -printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$CODEX_TIMEOUT" codex exec "${CODEX_EXEC_ARGS[@]}" - \ +printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$CODEX_TIMEOUT" codex "${CODEX_DISABLE_HOOKS_ARGS[@]}" exec "${CODEX_EXEC_ARGS[@]}" - \ > "$CODEX_STDOUT_FILE" 2> "$CODEX_STDERR_FILE" || CODEX_EXIT_CODE=$? echo "Codex exit code: $CODEX_EXIT_CODE" >&2 diff --git a/hooks/pr-loop-stop-hook.sh b/hooks/pr-loop-stop-hook.sh index f02710e2..8dedd8c0 100755 --- a/hooks/pr-loop-stop-hook.sh +++ b/hooks/pr-loop-stop-hook.sh @@ -1334,12 +1334,15 @@ if [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "true" ]] || [[ "${HUMANIZE_CODEX_ CODEX_AUTO_FLAG="--dangerously-bypass-approvals-and-sandbox" fi +# Disable native hooks for nested Codex reviewer calls to prevent Stop-hook recursion. +CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) + CODEX_ARGS+=("$CODEX_AUTO_FLAG" "-C" "$PROJECT_ROOT") CODEX_PROMPT_CONTENT=$(cat "$CODEX_PROMPT_FILE") CODEX_EXIT_CODE=0 -printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$PR_CODEX_TIMEOUT" codex exec "${CODEX_ARGS[@]}" - \ +printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$PR_CODEX_TIMEOUT" codex "${CODEX_DISABLE_HOOKS_ARGS[@]}" exec "${CODEX_ARGS[@]}" - \ > "$CHECK_FILE" 2>/dev/null || CODEX_EXIT_CODE=$? if [[ $CODEX_EXIT_CODE -ne 0 ]]; then diff --git a/scripts/bitlesson-select.sh b/scripts/bitlesson-select.sh index 9399b06c..4d2b668d 100755 --- a/scripts/bitlesson-select.sh +++ b/scripts/bitlesson-select.sh @@ -15,6 +15,10 @@ PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(git rev-parse --show-toplevel 2>/dev/null MERGED_CONFIG="$(load_merged_config "$PLUGIN_ROOT" "$PROJECT_ROOT")" BITLESSON_MODEL="$(get_config_value "$MERGED_CONFIG" "bitlesson_model")" BITLESSON_MODEL="${BITLESSON_MODEL:-haiku}" +CODEX_FALLBACK_MODEL="$(get_config_value "$MERGED_CONFIG" "codex_model")" +CODEX_FALLBACK_MODEL="${CODEX_FALLBACK_MODEL:-$DEFAULT_CODEX_MODEL}" +PROVIDER_MODE="$(get_config_value "$MERGED_CONFIG" "provider_mode")" +PROVIDER_MODE="${PROVIDER_MODE:-auto}" # Source portable timeout wrapper source "$SCRIPT_DIR/portable-timeout.sh" @@ -82,12 +86,34 @@ if [[ -z "$BITLESSON_FILE" ]]; then exit 1 fi +if [[ ! -f "$BITLESSON_FILE" ]]; then + echo "Error: BitLesson file not found: $BITLESSON_FILE" >&2 + exit 1 +fi + +BITLESSON_CONTENT="$(cat "$BITLESSON_FILE")" +if [[ -z "$(printf '%s' "$BITLESSON_CONTENT" | tr -d ' \t\n\r')" ]]; then + echo "Error: BitLesson file is empty (whitespace only): $BITLESSON_FILE" >&2 + exit 1 +fi + +if ! printf '%s\n' "$BITLESSON_CONTENT" | grep -Eq '^[[:space:]]*##[[:space:]]+Lesson:'; then + printf 'LESSON_IDS: NONE\n' + printf 'RATIONALE: The BitLesson file has no recorded lessons yet.\n' + exit 0 +fi + # ======================================== # Determine Provider from BITLESSON_MODEL # ======================================== BITLESSON_PROVIDER="$(detect_provider "$BITLESSON_MODEL")" +if [[ "$PROVIDER_MODE" == "codex-only" ]] && [[ "$BITLESSON_PROVIDER" == "claude" ]]; then + BITLESSON_MODEL="$CODEX_FALLBACK_MODEL" + BITLESSON_PROVIDER="codex" +fi + # ======================================== # Conditional Dependency Check (with fallback) # ======================================== @@ -99,17 +125,6 @@ if ! check_provider_dependency "$BITLESSON_PROVIDER" 2>/dev/null; then check_provider_dependency "$BITLESSON_PROVIDER" fi -if [[ ! -f "$BITLESSON_FILE" ]]; then - echo "Error: BitLesson file not found: $BITLESSON_FILE" >&2 - exit 1 -fi - -BITLESSON_CONTENT="$(cat "$BITLESSON_FILE")" -if [[ -z "$(printf '%s' "$BITLESSON_CONTENT" | tr -d ' \t\n\r')" ]]; then - echo "Error: BitLesson file is empty (whitespace only): $BITLESSON_FILE" >&2 - exit 1 -fi - # ======================================== # Detect Project Root (for -C) # ======================================== @@ -148,6 +163,7 @@ $BITLESSON_CONTENT 1. Match only lessons that are directly relevant to the sub-task scope and failure mode. 2. Prefer precision over recall: do not include weakly related lessons. 3. If nothing is relevant, return \`NONE\`. +4. Use only the information in this prompt. Do not use tools, shell commands, browser access, MCP servers, or repository inspection. ## Output Format (Stable) @@ -164,21 +180,35 @@ EOF SELECTOR_TIMEOUT=120 -CODEX_EXIT_CODE=0 -if [[ "$BITLESSON_PROVIDER" == "codex" ]]; then - CODEX_EXEC_ARGS=("-m" "$BITLESSON_MODEL" "-c" "model_reasoning_effort=high") +run_selector() { + local provider="$1" + local model="$2" + + if [[ "$provider" == "codex" ]]; then + local codex_exec_args=( + "--disable" "codex_hooks" + "--skip-git-repo-check" + "--ephemeral" + "-s" "read-only" + "-m" "$model" + "-c" "model_reasoning_effort=low" + "-C" "$CODEX_PROJECT_ROOT" + ) + printf '%s' "$PROMPT" | run_with_timeout "$SELECTOR_TIMEOUT" codex exec "${codex_exec_args[@]}" - + return $? + fi - # Determine automation flag based on environment variable (same as ask-codex.sh) - CODEX_AUTO_FLAG="--full-auto" - if [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "true" ]] || [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "1" ]]; then - CODEX_AUTO_FLAG="--dangerously-bypass-approvals-and-sandbox" + if [[ "$provider" == "claude" ]]; then + printf '%s' "$PROMPT" | run_with_timeout "$SELECTOR_TIMEOUT" claude --print --model "$model" - + return $? fi - CODEX_EXEC_ARGS+=("$CODEX_AUTO_FLAG" "-C" "$CODEX_PROJECT_ROOT") - RAW_OUTPUT="$(printf '%s' "$PROMPT" | run_with_timeout "$SELECTOR_TIMEOUT" codex exec "${CODEX_EXEC_ARGS[@]}" -)" || CODEX_EXIT_CODE=$? -elif [[ "$BITLESSON_PROVIDER" == "claude" ]]; then - RAW_OUTPUT="$(printf '%s' "$PROMPT" | run_with_timeout "$SELECTOR_TIMEOUT" claude --print --model "$BITLESSON_MODEL" -)" || CODEX_EXIT_CODE=$? -fi + echo "Error: Unsupported BitLesson provider '$provider'" >&2 + return 1 +} + +CODEX_EXIT_CODE=0 +RAW_OUTPUT="$(run_selector "$BITLESSON_PROVIDER" "$BITLESSON_MODEL" 2>&1)" || CODEX_EXIT_CODE=$? if [[ $CODEX_EXIT_CODE -eq 124 ]]; then echo "Error: BitLesson selector timed out after ${SELECTOR_TIMEOUT} seconds" >&2 @@ -187,6 +217,7 @@ fi if [[ $CODEX_EXIT_CODE -ne 0 ]]; then echo "Error: BitLesson selector failed (exit code $CODEX_EXIT_CODE)" >&2 + printf '%s\n' "$RAW_OUTPUT" >&2 exit "$CODEX_EXIT_CODE" fi diff --git a/scripts/install-codex-hooks.sh b/scripts/install-codex-hooks.sh new file mode 100755 index 00000000..362b822f --- /dev/null +++ b/scripts/install-codex-hooks.sh @@ -0,0 +1,197 @@ +#!/bin/bash +# +# Install/update Humanize native Codex hooks in CODEX_HOME/hooks.json. +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +CODEX_CONFIG_DIR="${CODEX_HOME:-${HOME}/.codex}" +RUNTIME_ROOT="$CODEX_CONFIG_DIR/skills/humanize" +DRY_RUN="false" +ENABLE_FEATURE="true" +HOOKS_TEMPLATE="$REPO_ROOT/config/codex-hooks.json" + +usage() { + cat <<'EOF' +Install/update Humanize native Codex hooks. + +Usage: + scripts/install-codex-hooks.sh [options] + +Options: + --codex-config-dir PATH Codex config dir (default: ${CODEX_HOME:-~/.codex}) + --runtime-root PATH Installed Humanize runtime root (default: /skills/humanize) + --skip-enable-feature Do not run `codex features enable codex_hooks` + --dry-run Print actions without writing + -h, --help Show help +EOF +} + +log() { + printf '[install-codex-hooks] %s\n' "$*" +} + +die() { + printf '[install-codex-hooks] Error: %s\n' "$*" >&2 + exit 1 +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --codex-config-dir) + [[ -n "${2:-}" ]] || die "--codex-config-dir requires a value" + CODEX_CONFIG_DIR="$2" + shift 2 + ;; + --runtime-root) + [[ -n "${2:-}" ]] || die "--runtime-root requires a value" + RUNTIME_ROOT="$2" + shift 2 + ;; + --skip-enable-feature) + ENABLE_FEATURE="false" + shift + ;; + --dry-run) + DRY_RUN="true" + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + die "unknown option: $1" + ;; + esac +done + +[[ -f "$HOOKS_TEMPLATE" ]] || die "hook template not found: $HOOKS_TEMPLATE" + +HOOKS_FILE="$CODEX_CONFIG_DIR/hooks.json" + +require_codex_hooks_support() { + if ! command -v codex >/dev/null 2>&1; then + die "Codex CLI with native hooks support is required. Install Codex 0.114.0+ first." + fi + + if ! codex features list 2>/dev/null | grep -qE '^codex_hooks[[:space:]]'; then + die "Installed Codex CLI does not expose the codex_hooks feature. Humanize Codex install requires Codex 0.114.0+." + fi +} + +merge_hooks_json() { + local hooks_file="$1" + local template_file="$2" + local runtime_root="$3" + + if ! command -v python3 >/dev/null 2>&1; then + die "python3 is required to merge Codex hooks" + fi + + python3 - "$hooks_file" "$template_file" "$runtime_root" <<'PY' +import json +import pathlib +import re +import sys + +hooks_file = pathlib.Path(sys.argv[1]) +template_file = pathlib.Path(sys.argv[2]) +runtime_root = sys.argv[3] + +template_text = template_file.read_text(encoding="utf-8") +template_text = template_text.replace("{{HUMANIZE_RUNTIME_ROOT}}", runtime_root) +template = json.loads(template_text) + +existing = {} +if hooks_file.exists(): + with hooks_file.open("r", encoding="utf-8") as fh: + existing = json.load(fh) + +if not isinstance(existing, dict): + raise SystemExit(f"existing hooks config must be a JSON object: {hooks_file}") + +hooks = existing.setdefault("hooks", {}) +if not isinstance(hooks, dict): + raise SystemExit(f"existing hooks config has invalid 'hooks' object: {hooks_file}") + +stop_groups = hooks.get("Stop", []) +if stop_groups is None: + stop_groups = [] +if not isinstance(stop_groups, list): + raise SystemExit(f"existing hooks config has invalid Stop array: {hooks_file}") + +managed_pattern = re.compile(r"(^|/)humanize/hooks/(loop-codex-stop-hook\.sh|pr-loop-stop-hook\.sh)$") + +filtered_groups = [] +for group in stop_groups: + if not isinstance(group, dict): + filtered_groups.append(group) + continue + group_hooks = group.get("hooks") + if not isinstance(group_hooks, list): + filtered_groups.append(group) + continue + kept_hooks = [] + for hook in group_hooks: + if not isinstance(hook, dict): + kept_hooks.append(hook) + continue + command = hook.get("command") + if isinstance(command, str) and managed_pattern.search(command): + continue + kept_hooks.append(hook) + if kept_hooks: + new_group = dict(group) + new_group["hooks"] = kept_hooks + filtered_groups.append(new_group) + +managed_stop_groups = template.get("hooks", {}).get("Stop", []) +filtered_groups.extend(managed_stop_groups) +hooks["Stop"] = filtered_groups + +if not existing.get("description"): + existing["description"] = template.get("description", "Humanize Codex Hooks") + +hooks_file.parent.mkdir(parents=True, exist_ok=True) +hooks_file.write_text(json.dumps(existing, indent=2) + "\n", encoding="utf-8") +PY +} + +enable_feature() { + local config_dir="$1" + + [[ "$ENABLE_FEATURE" == "true" ]] || return 0 + + if CODEX_HOME="$config_dir" codex features enable codex_hooks >/dev/null 2>&1; then + log "enabled codex_hooks feature in $config_dir/config.toml" + else + die "failed to enable codex_hooks feature automatically in $config_dir/config.toml" + fi +} + +log "codex config dir: $CODEX_CONFIG_DIR" +log "runtime root: $RUNTIME_ROOT" +log "hooks file: $HOOKS_FILE" + +require_codex_hooks_support + +if [[ "$DRY_RUN" == "true" ]]; then + log "DRY-RUN merge $HOOKS_TEMPLATE -> $HOOKS_FILE" + if [[ "$ENABLE_FEATURE" == "true" ]]; then + log "DRY-RUN enable codex_hooks feature in $CODEX_CONFIG_DIR/config.toml" + fi + exit 0 +fi + +merge_hooks_json "$HOOKS_FILE" "$HOOKS_TEMPLATE" "$RUNTIME_ROOT" +enable_feature "$CODEX_CONFIG_DIR" + +cat </skills//SKILL.md + # /scripts + if [[ -d "$candidate_root/skills" ]] && [[ -d "$candidate_root/scripts" ]]; then + SKILLS_SOURCE_ROOT="$candidate_root/skills" + RUNTIME_SOURCE_ROOT="$candidate_root" + return 0 + fi + + # Installed runtime layout: + # /humanize/scripts/install-skill.sh + # /humanize-gen-plan/SKILL.md + # /humanize-rlcr/SKILL.md + if [[ -d "$runtime_root/scripts" ]] && [[ -d "$runtime_root/hooks" ]] && [[ -d "$runtime_root/prompt-template" ]]; then + skills_root="$(cd "$runtime_root/.." && pwd)" + if [[ -f "$skills_root/humanize/SKILL.md" ]] && [[ -f "$skills_root/humanize-gen-plan/SKILL.md" ]] && [[ -f "$skills_root/humanize-refine-plan/SKILL.md" ]] && [[ -f "$skills_root/humanize-rlcr/SKILL.md" ]]; then + SKILLS_SOURCE_ROOT="$skills_root" + RUNTIME_SOURCE_ROOT="$runtime_root" + return 0 + fi + fi + + die "could not resolve Humanize source layout from: $candidate_root" +} + sync_dir() { local src="$1" local dst="$2" @@ -107,7 +147,7 @@ sync_dir() { sync_one_skill() { local skill="$1" local target_dir="$2" - local src="$REPO_ROOT/skills/$skill" + local src="$SKILLS_SOURCE_ROOT/$skill" local dst="$target_dir/$skill" sync_dir "$src" "$dst" } @@ -120,7 +160,7 @@ install_runtime_bundle() { log "syncing runtime bundle into: $runtime_root" for component in scripts hooks prompt-template templates config agents; do - sync_dir "$REPO_ROOT/$component" "$runtime_root/$component" + sync_dir "$RUNTIME_SOURCE_ROOT/$component" "$runtime_root/$component" done } @@ -192,6 +232,7 @@ strip_claude_specific_frontmatter() { sync_target() { local label="$1" local target_dir="$2" + local selected_skills=("${SKILL_NAMES[@]}") log "target: $label" log "skills dir: $target_dir" @@ -200,7 +241,7 @@ sync_target() { mkdir -p "$target_dir" fi - for skill in "${SKILL_NAMES[@]}"; do + for skill in "${selected_skills[@]}"; do log "syncing [$label] skill: $skill" sync_one_skill "$skill" "$target_dir" done @@ -209,6 +250,140 @@ sync_target() { strip_claude_specific_frontmatter "$target_dir" } +install_codex_native_hooks() { + local target_dir="$1" + local runtime_root="$target_dir/humanize" + local hooks_installer="$REPO_ROOT/scripts/install-codex-hooks.sh" + local args=( + --codex-config-dir "$CODEX_CONFIG_DIR" + --runtime-root "$runtime_root" + ) + + [[ -x "$hooks_installer" ]] || die "missing Codex hooks installer: $hooks_installer" + [[ "$DRY_RUN" == "true" ]] && args+=(--dry-run) + + log "installing native Codex hooks into: $CODEX_CONFIG_DIR" + "$hooks_installer" "${args[@]}" +} + +install_codex_user_config() { + local runtime_root="$1" + local install_target="$2" + local user_config_dir="${HUMANIZE_USER_CONFIG_DIR}" + local user_config_file="$user_config_dir/config.json" + local default_config_file="$runtime_root/config/default_config.json" + + [[ -f "$default_config_file" ]] || die "missing default config: $default_config_file" + + if ! command -v python3 >/dev/null 2>&1; then + die "python3 is required to update Humanize user config for Codex installs" + fi + + if [[ "$DRY_RUN" == "true" ]]; then + log "DRY-RUN seed Codex-friendly BitLesson config in $user_config_file" + return + fi + + mkdir -p "$user_config_dir" + + python3 - "$default_config_file" "$user_config_file" "$install_target" <<'PY' +import json +import pathlib +import sys + +default_config = pathlib.Path(sys.argv[1]) +user_config = pathlib.Path(sys.argv[2]) +install_target = sys.argv[3] + +defaults = json.loads(default_config.read_text(encoding="utf-8")) +default_codex_model = defaults.get("codex_model") or "gpt-5.4" + +if user_config.exists(): + try: + data = json.loads(user_config.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + print(f"malformed existing user config: {user_config}: {exc}", file=sys.stderr) + sys.exit(2) + if not isinstance(data, dict): + print(f"existing user config is not a JSON object: {user_config}", file=sys.stderr) + sys.exit(2) +else: + data = {} + +if not data.get("bitlesson_model"): + data["bitlesson_model"] = data.get("codex_model") or default_codex_model + +if install_target == "codex" and not data.get("provider_mode"): + data["provider_mode"] = "codex-only" + +user_config.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8") +PY + case "$?" in + 0) + log "ensured BitLesson uses a Codex/OpenAI model in $user_config_file" + ;; + 2) + die "failed to update $user_config_file because it is malformed; fix it manually and rerun install" + ;; + *) + die "failed to update Humanize user config at $user_config_file" + ;; + esac +} + +install_bitlesson_selector_shim() { + local primary_runtime_root="$1" + local secondary_runtime_root="${2:-}" + local shim_path="$COMMAND_BIN_DIR/bitlesson-selector" + + if [[ "$DRY_RUN" == "true" ]]; then + log "DRY-RUN install bitlesson-selector shim into $shim_path" + return + fi + + mkdir -p "$COMMAND_BIN_DIR" + + cat > "$shim_path" <> "$shim_path" <> "$shim_path" <<'EOF' +) + +for candidate in "${candidate_paths[@]}"; do + if [[ -x "$candidate" ]]; then + exec "$candidate" "$@" + fi +done + +echo "Error: Humanize bitlesson selector runtime not found. Re-run install-skill.sh." >&2 +exit 1 +EOF + + chmod +x "$shim_path" + log "installed bitlesson-selector shim into: $shim_path" +} + +install_kimi_target() { + sync_target "kimi" "$KIMI_SKILLS_DIR" +} + +install_codex_target() { + sync_target "codex" "$CODEX_SKILLS_DIR" + install_codex_user_config "$CODEX_SKILLS_DIR/humanize" "$TARGET" + install_codex_native_hooks "$CODEX_SKILLS_DIR" +} + while [[ $# -gt 0 ]]; do case "$1" in --target) @@ -239,6 +414,16 @@ while [[ $# -gt 0 ]]; do CODEX_SKILLS_DIR="$2" shift 2 ;; + --codex-config-dir) + [[ -n "${2:-}" ]] || die "--codex-config-dir requires a value" + CODEX_CONFIG_DIR="$2" + shift 2 + ;; + --command-bin-dir) + [[ -n "${2:-}" ]] || die "--command-bin-dir requires a value" + COMMAND_BIN_DIR="$2" + shift 2 + ;; --dry-run) DRY_RUN="true" shift @@ -253,6 +438,7 @@ while [[ $# -gt 0 ]]; do esac done +resolve_source_layout "$REPO_ROOT" validate_repo if [[ -n "$LEGACY_SKILLS_DIR" ]]; then @@ -273,18 +459,23 @@ if [[ "$TARGET" == "kimi" || "$TARGET" == "both" ]]; then fi if [[ "$TARGET" == "codex" || "$TARGET" == "both" ]]; then log "codex skills dir: $CODEX_SKILLS_DIR" + log "codex config dir: $CODEX_CONFIG_DIR" fi +log "command bin dir: $COMMAND_BIN_DIR" case "$TARGET" in kimi) - sync_target "kimi" "$KIMI_SKILLS_DIR" + install_kimi_target + install_bitlesson_selector_shim "$KIMI_SKILLS_DIR/humanize" ;; codex) - sync_target "codex" "$CODEX_SKILLS_DIR" + install_codex_target + install_bitlesson_selector_shim "$CODEX_SKILLS_DIR/humanize" "$KIMI_SKILLS_DIR/humanize" ;; both) - sync_target "kimi" "$KIMI_SKILLS_DIR" - sync_target "codex" "$CODEX_SKILLS_DIR" + install_kimi_target + install_codex_target + install_bitlesson_selector_shim "$CODEX_SKILLS_DIR/humanize" "$KIMI_SKILLS_DIR/humanize" ;; esac @@ -304,6 +495,7 @@ fi if [[ "$TARGET" == "codex" || "$TARGET" == "both" ]]; then cat </humanize +Codex installs also update native hook/config state in: + $CODEX_CONFIG_DIR + No shell profile changes were made. +If $COMMAND_BIN_DIR is on PATH, the bitlesson-selector shim is now available there. EOF diff --git a/scripts/rlcr-stop-gate.sh b/scripts/rlcr-stop-gate.sh index 306f875c..c707941c 100755 --- a/scripts/rlcr-stop-gate.sh +++ b/scripts/rlcr-stop-gate.sh @@ -24,6 +24,8 @@ HOOK_SCRIPT="$HUMANIZE_ROOT/hooks/loop-codex-stop-hook.sh" SESSION_ID="${CLAUDE_SESSION_ID:-}" TRANSCRIPT_PATH="${CLAUDE_TRANSCRIPT_PATH:-}" PRINT_JSON="false" +HOOK_MODEL="${CODEX_MODEL:-humanize-skill-gate}" +HOOK_PERMISSION_MODE="${CODEX_PERMISSION_MODE:-default}" usage() { cat <<'EOF' @@ -88,10 +90,15 @@ HOOK_INPUT=$(jq -n \ --arg session_id "$SESSION_ID" \ --arg transcript_path "$TRANSCRIPT_PATH" \ --arg cwd "$PROJECT_ROOT" \ + --arg model "$HOOK_MODEL" \ + --arg permission_mode "$HOOK_PERMISSION_MODE" \ '{ hook_event_name: "Stop", stop_hook_active: false, cwd: $cwd, + model: $model, + permission_mode: $permission_mode, + last_assistant_message: null, session_id: ($session_id | select(length > 0)), transcript_path: ($transcript_path | select(length > 0)) }') diff --git a/skills/humanize-rlcr/SKILL.md b/skills/humanize-rlcr/SKILL.md index e65a05b6..d9873b47 100644 --- a/skills/humanize-rlcr/SKILL.md +++ b/skills/humanize-rlcr/SKILL.md @@ -1,21 +1,15 @@ --- name: humanize-rlcr -description: Start RLCR (Ralph-Loop with Codex Review) with hook-equivalent enforcement from skill mode by reusing the existing stop-hook logic. +description: Start RLCR (Ralph-Loop with Codex Review) on Codex using the native Stop hook. type: flow user-invocable: false disable-model-invocation: true --- -# Humanize RLCR Loop (Hook-Equivalent) +# Humanize RLCR Loop -Use this flow to run RLCR in environments without native hooks. -Do not re-implement review logic manually. Always call the RLCR stop gate wrapper: - -```bash -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/rlcr-stop-gate.sh" -``` - -The wrapper executes `hooks/loop-codex-stop-hook.sh`, so skill-mode behavior stays aligned with hook-mode behavior. +Use this flow as the Codex entrypoint for RLCR. +Codex installs of Humanize require native hooks support and install the Humanize `Stop` hooks automatically. ## Runtime Root @@ -49,24 +43,13 @@ For each round: 4. Write required summary file: - Normal phase: `.humanize/rlcr//round--summary.md` - Finalize phase: `.humanize/rlcr//finalize-summary.md` -5. Run gate command: - -```bash -GATE_CMD=("{{HUMANIZE_RUNTIME_ROOT}}/scripts/rlcr-stop-gate.sh") -[[ -n "${CLAUDE_SESSION_ID:-}" ]] && GATE_CMD+=(--session-id "$CLAUDE_SESSION_ID") -[[ -n "${CLAUDE_TRANSCRIPT_PATH:-}" ]] && GATE_CMD+=(--transcript-path "$CLAUDE_TRANSCRIPT_PATH") -"${GATE_CMD[@]}" -GATE_EXIT=$? -``` - -6. Handle gate result: - - `0`: loop is allowed to exit (done). - - `10`: blocked by RLCR logic. Follow returned instructions exactly, continue next round. - - `20`: infrastructure error (wrapper/hook/runtime). Report error, do not fake completion. +5. Stop or exit normally. +6. Let the native Humanize `Stop` hook run automatically. +7. If the hook blocks exit, follow the returned instructions exactly and continue the next round. ## What This Enforces -By routing through the stop-hook logic, this skill enforces: +The native Stop-hook path enforces: - state/schema validation (`current_round`, `max_iterations`, `review_started`, `base_branch`, etc.) - branch consistency checks @@ -86,8 +69,8 @@ By routing through the stop-hook logic, this skill enforces: ## Critical Rules 1. Never manually edit `state.md` or `finalize-state.md`. -2. Never skip a blocked gate result by declaring completion manually. -3. Never run ad-hoc `codex exec` / `codex review` in place of the gate for phase transitions. +2. Never skip a blocked hook result by declaring completion manually. +3. Never run ad-hoc `codex exec` / `codex review` in place of the hook-managed phase transitions. 4. Always use files generated by the loop (`round-*-prompt.md`, `round-*-review-result.md`) as source of truth. ## Options @@ -121,9 +104,6 @@ Review phase `codex review` runs with `gpt-5.4:high`. # Review-only mode /flow:humanize-rlcr --skip-impl - -# Load skill without auto-execution -/skill:humanize-rlcr ``` ## Cancel diff --git a/skills/humanize/SKILL.md b/skills/humanize/SKILL.md index 1b916306..b9a6ccd5 100644 --- a/skills/humanize/SKILL.md +++ b/skills/humanize/SKILL.md @@ -45,7 +45,7 @@ The RLCR (Ralph-Loop with Codex Review) loop has two phases: - Issues marked with `[P0-9]` severity markers - If issues found → AI fixes them and continues - If no issues → loop completes with Finalize Phase -- In skill mode, always run `{{HUMANIZE_RUNTIME_ROOT}}/scripts/rlcr-stop-gate.sh` to enforce hook-equivalent transitions and blocking +- On Codex CLI `0.114.0+` with `codex_hooks` enabled, Humanize installs a native `Stop` hook so exit gating runs automatically ### 2. PR Loop - Automated PR Review Handling @@ -80,10 +80,7 @@ Transforms a rough draft document into a structured implementation plan with: "{{HUMANIZE_RUNTIME_ROOT}}/scripts/setup-rlcr-loop.sh" --skip-impl ``` -```bash -# For each round, run the RLCR gate (required) -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/rlcr-stop-gate.sh" -``` +After each round, write the required summary and stop/exit normally. Humanize's native Codex `Stop` hook handles review gating automatically. **Common Options:** - `--max N` - Maximum iterations before auto-stop (default: 42) @@ -207,7 +204,7 @@ The RLCR loop uses a Goal Tracker to prevent goal drift: 2. **Maintain Goal Tracker**: Keep goal-tracker.md up-to-date with progress 3. **Be thorough**: Include details about implementation, files changed, tests added 4. **No cheating**: Don't try to exit by editing state files or running cancel commands -5. **Run stop gate each round**: Use `scripts/rlcr-stop-gate.sh` instead of manual phase control +5. **Use the native Stop hook on Codex**: After writing the required summary, stop/exit normally so Codex runs the Humanize Stop hook 6. **Trust the process**: External review helps improve implementation quality ## Prerequisites diff --git a/tests/run-all-tests.sh b/tests/run-all-tests.sh index cd3fb58a..8c1a4d67 100755 --- a/tests/run-all-tests.sh +++ b/tests/run-all-tests.sh @@ -84,6 +84,7 @@ TEST_SUITES=( "test-task-tag-routing.sh" "test-config-merge.sh" "test-config-error-handling.sh" + "test-codex-hook-install.sh" "test-unified-codex-config.sh" "test-pr-loop-1-scripts.sh" "test-pr-loop-2-hooks.sh" diff --git a/tests/test-agent-teams.sh b/tests/test-agent-teams.sh index 27285561..9f5b3663 100755 --- a/tests/test-agent-teams.sh +++ b/tests/test-agent-teams.sh @@ -532,11 +532,18 @@ setup_mock_codex_impl_feedback() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << MOCK_EOF #!/bin/bash -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'REVIEW' $feedback REVIEW -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then echo "No issues found." fi MOCK_EOF @@ -550,9 +557,16 @@ setup_mock_codex_review_issues() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << MOCK_EOF #!/bin/bash -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then echo "Should not be called in review phase" -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then cat << 'REVIEWOUT' $review_output REVIEWOUT diff --git a/tests/test-bitlesson-select-routing.sh b/tests/test-bitlesson-select-routing.sh index d3c205c3..113c92a5 100755 --- a/tests/test-bitlesson-select-routing.sh +++ b/tests/test-bitlesson-select-routing.sh @@ -26,6 +26,25 @@ create_mock_bitlesson() { EOF } +create_real_bitlesson() { + local dir="$1" + mkdir -p "$dir" + cat > "$dir/bitlesson.md" <<'EOF' +# BitLesson Knowledge Base +## Entries + +## Lesson: Avoid tracker drift +Lesson ID: BL-20260315-tracker-drift +Scope: goal-tracker.md +Problem Description: Tracker diverges from actual task status. +Root Cause: Status rows are not updated after verification. +Solution: Update tracker rows immediately after each verification step. +Constraints: Keep tracker edits minimal. +Validation Evidence: Verified in test fixture. +Source Rounds: 0 +EOF +} + # Helper: create a mock codex binary that outputs valid bitlesson-selector format create_mock_codex() { local bin_dir="$1" @@ -102,7 +121,7 @@ echo "--- Test 1: gpt-* model routes to codex ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_codex "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -166,7 +185,7 @@ echo "--- Test 2: haiku model routes to claude ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_claude "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -195,7 +214,7 @@ echo "--- Test 3: sonnet model routes to claude ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_claude "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -224,7 +243,7 @@ echo "--- Test 4: OPUS (uppercase) model routes to claude ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_claude "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -253,7 +272,7 @@ echo "--- Test 5: Unknown model exits non-zero with error ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" printf '{"bitlesson_model": "unknown-xyz-model"}' > "$TEST_DIR/.humanize/config.json" @@ -279,7 +298,7 @@ echo "--- Test 6: gpt-* model with missing codex binary exits non-zero ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" printf '{"bitlesson_model": "gpt-4o"}' > "$TEST_DIR/.humanize/config.json" # Use a bin dir that contains a stub claude but NOT codex. @@ -315,7 +334,7 @@ echo "--- Test 7: haiku model falls back to codex when claude binary is missing echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" printf '{"bitlesson_model": "haiku"}' > "$TEST_DIR/.humanize/config.json" # Use a bin dir that contains a stub codex but NOT claude. @@ -348,4 +367,102 @@ fi # Summary # ======================================== +echo "" +echo "--- Test 8: codex-only provider mode forces codex routing ---" +echo "" + +setup_test_dir +create_real_bitlesson "$TEST_DIR" +mkdir -p "$TEST_DIR/.humanize" +printf '{"bitlesson_model": "haiku", "codex_model": "gpt-5.4", "provider_mode": "codex-only"}' > "$TEST_DIR/.humanize/config.json" +FALLBACK_BIN="$TEST_DIR/fallback-bin" +create_mock_codex "$FALLBACK_BIN" + +exit_code=0 +stdout_out="" +stdout_out=$(CLAUDE_PROJECT_DIR="$TEST_DIR" XDG_CONFIG_HOME="$TEST_DIR/no-user" \ + PATH="$FALLBACK_BIN:$PATH" \ + bash "$BITLESSON_SELECT" \ + --task "Initialize tracker" \ + --paths "plans/plan.md" \ + --bitlesson-file "$TEST_DIR/bitlesson.md" 2>/dev/null) || exit_code=$? + +if [[ $exit_code -eq 0 ]] && echo "$stdout_out" | grep -q "mock codex"; then + pass "codex-only provider mode forces codex routing" +else + fail "codex-only provider mode forces codex routing" "exit=0 + mock codex rationale" "exit=$exit_code, stdout=$stdout_out" +fi + +echo "" +echo "--- Test 9: Placeholder BitLesson file short-circuits to NONE ---" +echo "" + +setup_test_dir +create_mock_bitlesson "$TEST_DIR" +mkdir -p "$TEST_DIR/.humanize" +printf '{"bitlesson_model": "gpt-5.4"}' > "$TEST_DIR/.humanize/config.json" + +exit_code=0 +stdout_out="" +stdout_out=$(CLAUDE_PROJECT_DIR="$TEST_DIR" XDG_CONFIG_HOME="$TEST_DIR/no-user" \ + PATH="$SAFE_BASE_PATH" \ + bash "$BITLESSON_SELECT" \ + --task "Any task" \ + --paths "README.md" \ + --bitlesson-file "$TEST_DIR/bitlesson.md" 2>/dev/null) || exit_code=$? + +if [[ $exit_code -eq 0 ]] && echo "$stdout_out" | grep -q "LESSON_IDS: NONE" && echo "$stdout_out" | grep -q "no recorded lessons"; then + pass "Placeholder BitLesson file returns NONE without invoking a model" +else + fail "Placeholder BitLesson file returns NONE without invoking a model" "exit=0 + NONE rationale" "exit=$exit_code, stdout=$stdout_out" +fi + +echo "" +echo "--- Test 10: Codex selector disables hooks and avoids full-auto ---" +echo "" + +setup_test_dir +create_real_bitlesson "$TEST_DIR" +mkdir -p "$TEST_DIR/.humanize" +printf '{"bitlesson_model": "gpt-5.4"}' > "$TEST_DIR/.humanize/config.json" +CAPTURE_BIN="$TEST_DIR/capture-bin" +mkdir -p "$CAPTURE_BIN" +cat > "$CAPTURE_BIN/codex" <<'EOF' +#!/bin/bash +printf '%s\n' "$@" > "${TEST_CAPTURE_ARGS:?}" +cat > /dev/null +cat <<'OUT' +LESSON_IDS: BL-20260315-tracker-drift +RATIONALE: The tracker lesson directly matches the task. +OUT +EOF +chmod +x "$CAPTURE_BIN/codex" + +CAPTURE_ARGS="$TEST_DIR/codex-args.txt" +exit_code=0 +stdout_out="" +stdout_out=$(TEST_CAPTURE_ARGS="$CAPTURE_ARGS" CLAUDE_PROJECT_DIR="$TEST_DIR" XDG_CONFIG_HOME="$TEST_DIR/no-user" \ + PATH="$CAPTURE_BIN:$SAFE_BASE_PATH" \ + bash "$BITLESSON_SELECT" \ + --task "Update the goal tracker after verification" \ + --paths "goal-tracker.md" \ + --bitlesson-file "$TEST_DIR/bitlesson.md" 2>/dev/null) || exit_code=$? + +captured_args="$(cat "$CAPTURE_ARGS")" + +if [[ $exit_code -eq 0 ]] \ + && echo "$stdout_out" | grep -q "BL-20260315-tracker-drift" \ + && echo "$captured_args" | grep -q -- '--disable' \ + && echo "$captured_args" | grep -q -- 'codex_hooks' \ + && echo "$captured_args" | grep -q -- '--skip-git-repo-check' \ + && echo "$captured_args" | grep -q -- '--ephemeral' \ + && echo "$captured_args" | grep -q -- 'read-only' \ + && ! echo "$captured_args" | grep -q -- '--full-auto'; then + pass "Codex selector runs as a direct helper without hooks or full-auto" +else + fail "Codex selector runs as a direct helper without hooks or full-auto" \ + "exit=0 + direct-helper args" \ + "exit=$exit_code, stdout=$stdout_out, args=$captured_args" +fi + print_test_summary "Bitlesson Select Routing Test Summary" diff --git a/tests/test-codex-hook-install.sh b/tests/test-codex-hook-install.sh new file mode 100755 index 00000000..55fc71d3 --- /dev/null +++ b/tests/test-codex-hook-install.sh @@ -0,0 +1,340 @@ +#!/bin/bash +# +# Tests for Codex-native hook installation and merge behavior. +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" + +INSTALL_SCRIPT="$PROJECT_ROOT/scripts/install-skill.sh" + +echo "==========================================" +echo "Codex Hook Install Tests" +echo "==========================================" +echo "" + +if [[ ! -x "$INSTALL_SCRIPT" ]]; then + echo "FATAL: install-skill.sh not found at $INSTALL_SCRIPT" >&2 + exit 1 +fi + +if ! command -v python3 >/dev/null 2>&1; then + echo "FATAL: python3 is required for this test" >&2 + exit 1 +fi + +setup_test_dir + +FAKE_BIN="$TEST_DIR/bin" +CODEX_HOME_DIR="$TEST_DIR/codex-home" +HOOKS_FILE="$CODEX_HOME_DIR/hooks.json" +FEATURE_LOG="$TEST_DIR/codex-features.log" +XDG_CONFIG_HOME_DIR="$TEST_DIR/xdg-config" +HUMANIZE_USER_CONFIG="$XDG_CONFIG_HOME_DIR/humanize/config.json" +COMMAND_BIN_DIR="$TEST_DIR/command-bin" +mkdir -p "$FAKE_BIN" "$CODEX_HOME_DIR" "$COMMAND_BIN_DIR" + +cat > "$FAKE_BIN/codex" <<'EOF' +#!/bin/bash +set -euo pipefail + +if [[ "${1:-}" == "features" && "${2:-}" == "list" ]]; then + cat <<'LIST' +codex_hooks under development false +LIST + exit 0 +fi + +if [[ "${1:-}" == "features" && "${2:-}" == "enable" && "${3:-}" == "codex_hooks" ]]; then + printf 'CODEX_HOME=%s\n' "${CODEX_HOME:-}" >> "${TEST_CODEX_FEATURE_LOG:?}" + mkdir -p "${CODEX_HOME:?}" + : > "${CODEX_HOME}/.codex-hooks-enabled" + exit 0 +fi + +if [[ "${1:-}" == "exec" ]]; then + cat <<'OUT' +LESSON_IDS: NONE +RATIONALE: No matching lessons found (fake codex exec). +OUT + exit 0 +fi + +echo "unexpected fake codex invocation: $*" >&2 +exit 1 +EOF +chmod +x "$FAKE_BIN/codex" + +cat > "$HOOKS_FILE" <<'EOF' +{ + "description": "Existing hooks", + "hooks": { + "SessionStart": [ + { + "hooks": [ + { + "type": "command", + "command": "/custom/session-start.sh", + "timeout": 15 + } + ] + } + ], + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "/tmp/old/skills/humanize/hooks/loop-codex-stop-hook.sh", + "timeout": 30 + }, + { + "type": "command", + "command": "/tmp/old/skills/humanize/hooks/pr-loop-stop-hook.sh", + "timeout": 30 + } + ] + }, + { + "hooks": [ + { + "type": "command", + "command": "/custom/keep-me.sh", + "timeout": 5 + } + ] + } + ] + } +} +EOF + +PATH="$FAKE_BIN:$PATH" TEST_CODEX_FEATURE_LOG="$FEATURE_LOG" XDG_CONFIG_HOME="$XDG_CONFIG_HOME_DIR" \ + "$INSTALL_SCRIPT" \ + --target codex \ + --codex-config-dir "$CODEX_HOME_DIR" \ + --codex-skills-dir "$CODEX_HOME_DIR/skills" \ + --command-bin-dir "$COMMAND_BIN_DIR" \ + > "$TEST_DIR/install.log" 2>&1 + +if [[ -f "$CODEX_HOME_DIR/skills/humanize/SKILL.md" ]]; then + pass "Codex install syncs Humanize skill bundle" +else + fail "Codex install syncs Humanize skill bundle" "skills/humanize/SKILL.md exists" "missing" +fi + +if [[ -f "$CODEX_HOME_DIR/skills/humanize-rlcr/SKILL.md" ]]; then + pass "Codex install keeps humanize-rlcr entrypoint skill" +else + fail "Codex install keeps humanize-rlcr entrypoint skill" "skills/humanize-rlcr/SKILL.md exists" "missing" +fi + +if [[ -f "$HOOKS_FILE" ]]; then + pass "Codex install writes hooks.json" +else + fail "Codex install writes hooks.json" "$HOOKS_FILE exists" "missing" +fi + +if [[ -f "$CODEX_HOME_DIR/.codex-hooks-enabled" ]]; then + pass "Codex install enables codex_hooks feature" +else + fail "Codex install enables codex_hooks feature" ".codex-hooks-enabled marker exists" "missing" +fi + +if [[ -f "$HUMANIZE_USER_CONFIG" ]]; then + pass "Codex install writes Humanize user config" +else + fail "Codex install writes Humanize user config" "$HUMANIZE_USER_CONFIG exists" "missing" +fi + +if [[ -x "$COMMAND_BIN_DIR/bitlesson-selector" ]]; then + pass "Codex install writes a PATH-ready bitlesson-selector shim" +else + fail "Codex install writes a PATH-ready bitlesson-selector shim" "$COMMAND_BIN_DIR/bitlesson-selector exists" "missing" +fi + +if [[ "$(jq -r '.bitlesson_model // empty' "$HUMANIZE_USER_CONFIG")" == "gpt-5.4" ]]; then + pass "Codex install seeds bitlesson_model with a Codex/OpenAI model" +else + fail "Codex install seeds bitlesson_model with a Codex/OpenAI model" \ + "gpt-5.4" "$(jq -c '.' "$HUMANIZE_USER_CONFIG" 2>/dev/null || echo MISSING)" +fi + +if [[ "$(jq -r '.provider_mode // empty' "$HUMANIZE_USER_CONFIG")" == "codex-only" ]]; then + pass "Codex install marks Humanize user config as codex-only" +else + fail "Codex install marks Humanize user config as codex-only" \ + "codex-only" "$(jq -c '.' "$HUMANIZE_USER_CONFIG" 2>/dev/null || echo MISSING)" +fi + +runtime_root="$CODEX_HOME_DIR/skills/humanize" +PY_OUTPUT="$( + python3 - "$HOOKS_FILE" "$runtime_root" <<'PY' +import json +import pathlib +import sys + +hooks_file = pathlib.Path(sys.argv[1]) +runtime_root = sys.argv[2] +data = json.loads(hooks_file.read_text(encoding="utf-8")) + +commands = [] +for group in data["hooks"]["Stop"]: + for hook in group.get("hooks", []): + command = hook.get("command") + if isinstance(command, str): + commands.append(command) + +expected = { + f"{runtime_root}/hooks/loop-codex-stop-hook.sh", + f"{runtime_root}/hooks/pr-loop-stop-hook.sh", +} + +print("FOUND=" + ("1" if expected.issubset(set(commands)) else "0")) +print("KEEP=" + ("1" if "/custom/keep-me.sh" in commands else "0")) +print("OLD=" + ("1" if any("/tmp/old/skills/humanize/hooks/" in cmd for cmd in commands) else "0")) +print("SESSION=" + ("1" if data["hooks"]["SessionStart"][0]["hooks"][0]["command"] == "/custom/session-start.sh" else "0")) +print("COUNT=" + str(sum(1 for cmd in commands if "/humanize/hooks/" in cmd))) +PY +)" + +if grep -q '^FOUND=1$' <<<"$PY_OUTPUT"; then + pass "Codex install adds managed Humanize Stop hook commands" +else + fail "Codex install adds managed Humanize Stop hook commands" "FOUND=1" "$PY_OUTPUT" +fi + +if grep -q '^KEEP=1$' <<<"$PY_OUTPUT"; then + pass "Codex install preserves unrelated Stop hooks" +else + fail "Codex install preserves unrelated Stop hooks" "KEEP=1" "$PY_OUTPUT" +fi + +if grep -q '^OLD=0$' <<<"$PY_OUTPUT"; then + pass "Codex install removes stale Humanize hook commands" +else + fail "Codex install removes stale Humanize hook commands" "OLD=0" "$PY_OUTPUT" +fi + +if grep -q '^SESSION=1$' <<<"$PY_OUTPUT"; then + pass "Codex install preserves SessionStart hooks" +else + fail "Codex install preserves SessionStart hooks" "SESSION=1" "$PY_OUTPUT" +fi + +if grep -q '^COUNT=2$' <<<"$PY_OUTPUT"; then + pass "Codex install writes exactly two managed Humanize Stop hooks" +else + fail "Codex install writes exactly two managed Humanize Stop hooks" "COUNT=2" "$PY_OUTPUT" +fi + +mkdir -p "$TEST_DIR/project" +cat > "$TEST_DIR/project/bitlesson.md" <<'EOF' +# BitLesson Knowledge Base +## Entries + +EOF + +shim_output="$( + CLAUDE_PROJECT_DIR="$TEST_DIR/project" \ + XDG_CONFIG_HOME="$XDG_CONFIG_HOME_DIR" \ + PATH="$COMMAND_BIN_DIR:$FAKE_BIN:$PATH" \ + "$COMMAND_BIN_DIR/bitlesson-selector" \ + --task "Verify the shim dispatches into the installed runtime" \ + --paths "README.md" \ + --bitlesson-file "$TEST_DIR/project/bitlesson.md" +)" + +if grep -q '^LESSON_IDS: NONE$' <<<"$shim_output"; then + pass "bitlesson-selector shim dispatches into installed runtime" +else + fail "bitlesson-selector shim dispatches into installed runtime" "LESSON_IDS: NONE" "$shim_output" +fi + +PATH="$FAKE_BIN:$PATH" TEST_CODEX_FEATURE_LOG="$FEATURE_LOG" XDG_CONFIG_HOME="$XDG_CONFIG_HOME_DIR" \ + "$INSTALL_SCRIPT" \ + --target codex \ + --codex-config-dir "$CODEX_HOME_DIR" \ + --codex-skills-dir "$CODEX_HOME_DIR/skills" \ + > "$TEST_DIR/install-2.log" 2>&1 + +PY_OUTPUT_2="$( + python3 - "$HOOKS_FILE" <<'PY' +import json +import pathlib +import sys + +hooks_file = pathlib.Path(sys.argv[1]) +data = json.loads(hooks_file.read_text(encoding="utf-8")) + +commands = [] +for group in data["hooks"]["Stop"]: + for hook in group.get("hooks", []): + command = hook.get("command") + if isinstance(command, str): + commands.append(command) + +print(sum(1 for cmd in commands if "/humanize/hooks/" in cmd)) +PY +)" + +if [[ "$PY_OUTPUT_2" == "2" ]]; then + pass "Codex install is idempotent for managed hook commands" +else + fail "Codex install is idempotent for managed hook commands" "2" "$PY_OUTPUT_2" +fi + +if [[ "$(wc -l < "$FEATURE_LOG" | tr -d ' ')" == "2" ]]; then + pass "Codex feature enable runs on each Codex install/update" +else + fail "Codex feature enable runs on each Codex install/update" "2 log entries" "$(cat "$FEATURE_LOG")" +fi + +UNSUPPORTED_BIN="$TEST_DIR/bin-unsupported" +UNSUPPORTED_HOME="$TEST_DIR/codex-home-unsupported" +mkdir -p "$UNSUPPORTED_BIN" "$UNSUPPORTED_HOME" + +cat > "$UNSUPPORTED_BIN/codex" <<'EOF' +#!/bin/bash +set -euo pipefail + +if [[ "${1:-}" == "features" && "${2:-}" == "list" ]]; then + cat <<'LIST' +apply_patch_freeform under development false +LIST + exit 0 +fi + +echo "unexpected fake codex invocation: $*" >&2 +exit 1 +EOF +chmod +x "$UNSUPPORTED_BIN/codex" + +set +e +PATH="$UNSUPPORTED_BIN:$PATH" \ + "$INSTALL_SCRIPT" \ + --target codex \ + --codex-config-dir "$UNSUPPORTED_HOME" \ + --codex-skills-dir "$UNSUPPORTED_HOME/skills" \ + > "$TEST_DIR/install-unsupported.log" 2>&1 +UNSUPPORTED_EXIT=$? +set -e + +if [[ "$UNSUPPORTED_EXIT" -ne 0 ]]; then + pass "Codex install rejects builds without native hooks support" +else + fail "Codex install rejects builds without native hooks support" "non-zero exit" "exit 0" +fi + +if grep -q "codex_hooks feature" "$TEST_DIR/install-unsupported.log"; then + pass "Unsupported Codex failure explains missing codex_hooks feature" +else + fail "Unsupported Codex failure explains missing codex_hooks feature" \ + "error mentioning codex_hooks feature" \ + "$(cat "$TEST_DIR/install-unsupported.log")" +fi + +print_test_summary "Codex Hook Install Tests" diff --git a/tests/test-disable-nested-codex-hooks.sh b/tests/test-disable-nested-codex-hooks.sh new file mode 100644 index 00000000..ae0f8bba --- /dev/null +++ b/tests/test-disable-nested-codex-hooks.sh @@ -0,0 +1,213 @@ +#!/bin/bash +# +# Ensure Humanize's nested Codex reviewer calls disable native hooks to avoid recursion. +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +GREEN='\033[0;32m' +RED='\033[0;31m' +NC='\033[0m' +TESTS_PASSED=0 +TESTS_FAILED=0 + +pass() { + echo -e "${GREEN}PASS${NC}: $1" + TESTS_PASSED=$((TESTS_PASSED + 1)) +} + +fail() { + echo -e "${RED}FAIL${NC}: $1" + echo " Expected: $2" + echo " Got: $3" + TESTS_FAILED=$((TESTS_FAILED + 1)) +} + +echo "==========================================" +echo "Disable Nested Codex Hooks Tests" +echo "==========================================" +echo "" + +TEST_DIR="$(mktemp -d)" +trap 'rm -rf "$TEST_DIR"' EXIT + +export XDG_CACHE_HOME="$TEST_DIR/.cache" +mkdir -p "$XDG_CACHE_HOME" + +STOP_HOOK="$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" +PR_STOP_HOOK="$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" + +setup_repo() { + local repo_dir="$1" + + mkdir -p "$repo_dir" + cd "$repo_dir" + git init -q + git config user.email "test@test.com" + git config user.name "Test User" + git config commit.gpgsign false + + cat > .gitignore <<'EOF' +.humanize/ +plans/ +.cache/ +EOF + mkdir -p plans + cat > plans/test-plan.md <<'EOF' +# Test Plan +EOF + echo "init" > init.txt + git add .gitignore init.txt + git -c commit.gpgsign=false commit -q -m "initial" +} + +setup_mock_codex() { + local bin_dir="$1" + local args_file="$2" + + mkdir -p "$bin_dir" + cat > "$bin_dir/codex" < "$args_file" + +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done + +if [[ "\$subcommand" == "exec" ]]; then + echo "Review: keep iterating." + exit 0 +fi + +if [[ "\$subcommand" == "review" ]]; then + echo "No issues found." + exit 0 +fi + +echo "unexpected codex args: \$*" >&2 +exit 1 +EOF + chmod +x "$bin_dir/codex" +} + +setup_loop_dir() { + local repo_dir="$1" + local review_started="$2" + local loop_dir="$repo_dir/.humanize/rlcr/2026-03-14_12-00-00" + local current_branch + local base_commit + + current_branch="$(git -C "$repo_dir" rev-parse --abbrev-ref HEAD)" + base_commit="$(git -C "$repo_dir" rev-parse HEAD)" + + mkdir -p "$loop_dir" + cat > "$loop_dir/state.md" < "$loop_dir/goal-tracker.md" <<'EOF' +# Goal Tracker +## IMMUTABLE SECTION +### Ultimate Goal +Test nested codex disable +### Acceptance Criteria +- AC-1: Hook can run + +## MUTABLE SECTION +### Active Tasks +- Verify hook argv +EOF + + cat > "$loop_dir/round-1-summary.md" <<'EOF' +# Round Summary +Implemented initial changes. +EOF + + if [[ "$review_started" == "true" ]]; then + echo "build_finish_round=1" > "$loop_dir/.review-phase-started" + fi +} + +run_loop_hook() { + local repo_dir="$1" + local args_file="$2" + local review_started="$3" + local bin_dir="$TEST_DIR/bin-${review_started}" + + setup_mock_codex "$bin_dir" "$args_file" + setup_loop_dir "$repo_dir" "$review_started" + + set +e + OUTPUT=$(echo '{}' | PATH="$bin_dir:$PATH" CLAUDE_PROJECT_DIR="$repo_dir" bash "$STOP_HOOK" 2>&1) + EXIT_CODE=$? + set -e + + if [[ $EXIT_CODE -ne 0 ]]; then + fail "loop hook completes in $review_started mode" "exit 0" "exit=$EXIT_CODE output=$OUTPUT" + return + fi +} + +REPO_IMPL="$TEST_DIR/repo-impl" +setup_repo "$REPO_IMPL" +run_loop_hook "$REPO_IMPL" "$TEST_DIR/impl.args" "false" + +if grep -q -- '--disable codex_hooks exec' "$TEST_DIR/impl.args"; then + pass "implementation-phase stop hook disables codex_hooks for codex exec" +else + fail "implementation-phase stop hook disables codex_hooks for codex exec" \ + "--disable codex_hooks exec" "$(cat "$TEST_DIR/impl.args" 2>/dev/null || echo missing)" +fi + +REPO_REVIEW="$TEST_DIR/repo-review" +setup_repo "$REPO_REVIEW" +run_loop_hook "$REPO_REVIEW" "$TEST_DIR/review.args" "true" + +if grep -q -- '--disable codex_hooks review' "$TEST_DIR/review.args"; then + pass "review-phase stop hook disables codex_hooks for codex review" +else + fail "review-phase stop hook disables codex_hooks for codex review" \ + "--disable codex_hooks review" "$(cat "$TEST_DIR/review.args" 2>/dev/null || echo missing)" +fi + +if grep -q 'codex "\${CODEX_DISABLE_HOOKS_ARGS\[@\]}" exec' "$PR_STOP_HOOK"; then + pass "PR stop hook disables codex_hooks for nested codex exec" +else + fail "PR stop hook disables codex_hooks for nested codex exec" \ + 'codex "${CODEX_DISABLE_HOOKS_ARGS[@]}" exec' "not found" +fi + +echo "" +echo "========================================" +echo "Disable Nested Codex Hooks Tests" +echo "========================================" +echo "Passed: $TESTS_PASSED" +echo "Failed: $TESTS_FAILED" + +if [[ $TESTS_FAILED -ne 0 ]]; then + exit 1 +fi diff --git a/tests/test-finalize-phase.sh b/tests/test-finalize-phase.sh index 96890a41..4eaef4b6 100755 --- a/tests/test-finalize-phase.sh +++ b/tests/test-finalize-phase.sh @@ -57,11 +57,18 @@ setup_mock_codex() { cat > "$TEST_DIR/bin/codex" << EOF #!/bin/bash # Mock codex - outputs the provided content -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'REVIEW' $output REVIEW -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then # Handle codex review command cat << 'REVIEWOUT' $review_output @@ -82,11 +89,18 @@ setup_mock_codex_with_tracking() { #!/bin/bash # Track that codex was called echo "CODEX_WAS_CALLED" > "$TEST_DIR/codex_called.marker" -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'REVIEW' $output REVIEW -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then cat << 'REVIEWOUT' $review_output REVIEWOUT @@ -106,11 +120,18 @@ setup_mock_codex_review_failure() { cat > "$TEST_DIR/bin/codex" << EOF #!/bin/bash # Mock codex - fails on review command -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'REVIEW' $exec_output REVIEW -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then # Simulate failure with non-zero exit echo "Error: Codex review failed" >&2 exit $review_exit_code @@ -128,11 +149,18 @@ setup_mock_codex_review_empty_stdout() { cat > "$TEST_DIR/bin/codex" << EOF #!/bin/bash # Mock codex - produces empty stdout on review -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'REVIEW' $exec_output REVIEW -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then # Exit successfully but produce no output exit 0 fi diff --git a/tests/test-task-tag-routing.sh b/tests/test-task-tag-routing.sh index ae9365f7..24871e00 100755 --- a/tests/test-task-tag-routing.sh +++ b/tests/test-task-tag-routing.sh @@ -28,14 +28,21 @@ create_mock_codex() { mkdir -p "$bin_dir" cat > "$bin_dir/codex" << MOCK_EOF #!/bin/bash -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'OUT' $exec_output OUT -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then echo "No issues found." else - echo "mock-codex: unsupported command \$1" >&2 + echo "mock-codex: unsupported command \$*" >&2 exit 1 fi MOCK_EOF From dbb88bcadee1713be65529c88534836541408b85 Mon Sep 17 00:00:00 2001 From: Zhou Yaoyang Date: Fri, 27 Mar 2026 00:56:46 +0800 Subject: [PATCH 24/97] fixed tests for bitlesson-select-routing --- tests/test-bitlesson-select-routing.sh | 37 +++++++++++++++++++------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/tests/test-bitlesson-select-routing.sh b/tests/test-bitlesson-select-routing.sh index 113c92a5..dee42a5f 100755 --- a/tests/test-bitlesson-select-routing.sh +++ b/tests/test-bitlesson-select-routing.sh @@ -45,6 +45,25 @@ Source Rounds: 0 EOF } +create_real_humanize_bitlesson() { + local dir="$1" + mkdir -p "$dir/.humanize" + cat > "$dir/.humanize/bitlesson.md" <<'EOF' +# BitLesson Knowledge Base +## Entries + +## Lesson: Avoid tracker drift +Lesson ID: BL-20260315-tracker-drift +Scope: goal-tracker.md +Problem Description: Tracker diverges from actual task status. +Root Cause: Status rows are not updated after verification. +Solution: Update tracker rows immediately after each verification step. +Constraints: Keep tracker edits minimal. +Validation Evidence: Verified in test fixture. +Source Rounds: 0 +EOF +} + # Helper: create a mock codex binary that outputs valid bitlesson-selector format create_mock_codex() { local bin_dir="$1" @@ -121,7 +140,7 @@ echo "--- Test 1: gpt-* model routes to codex ---" echo "" setup_test_dir -create_real_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_codex "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -150,7 +169,7 @@ echo "--- Test 1b: gpt-* codex path passes stdin prompt via trailing '-' ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" STDIN_FILE="$TEST_DIR/codex-stdin.txt" create_recording_mock_codex "$BIN_DIR" "$STDIN_FILE" @@ -185,7 +204,7 @@ echo "--- Test 2: haiku model routes to claude ---" echo "" setup_test_dir -create_real_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_claude "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -214,7 +233,7 @@ echo "--- Test 3: sonnet model routes to claude ---" echo "" setup_test_dir -create_real_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_claude "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -243,7 +262,7 @@ echo "--- Test 4: OPUS (uppercase) model routes to claude ---" echo "" setup_test_dir -create_real_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_claude "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -272,7 +291,7 @@ echo "--- Test 5: Unknown model exits non-zero with error ---" echo "" setup_test_dir -create_real_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" printf '{"bitlesson_model": "unknown-xyz-model"}' > "$TEST_DIR/.humanize/config.json" @@ -298,7 +317,7 @@ echo "--- Test 6: gpt-* model with missing codex binary exits non-zero ---" echo "" setup_test_dir -create_real_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" printf '{"bitlesson_model": "gpt-4o"}' > "$TEST_DIR/.humanize/config.json" # Use a bin dir that contains a stub claude but NOT codex. @@ -334,7 +353,7 @@ echo "--- Test 7: haiku model falls back to codex when claude binary is missing echo "" setup_test_dir -create_real_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" printf '{"bitlesson_model": "haiku"}' > "$TEST_DIR/.humanize/config.json" # Use a bin dir that contains a stub codex but NOT claude. @@ -409,7 +428,7 @@ stdout_out=$(CLAUDE_PROJECT_DIR="$TEST_DIR" XDG_CONFIG_HOME="$TEST_DIR/no-user" bash "$BITLESSON_SELECT" \ --task "Any task" \ --paths "README.md" \ - --bitlesson-file "$TEST_DIR/bitlesson.md" 2>/dev/null) || exit_code=$? + --bitlesson-file "$TEST_DIR/.humanize/bitlesson.md" 2>/dev/null) || exit_code=$? if [[ $exit_code -eq 0 ]] && echo "$stdout_out" | grep -q "LESSON_IDS: NONE" && echo "$stdout_out" | grep -q "no recorded lessons"; then pass "Placeholder BitLesson file returns NONE without invoking a model" From 9ed3e1a953bf6761bb97fb21daa8ec5476b83587 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 16:37:58 -0700 Subject: [PATCH 25/97] bump version of humanize --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 7c5e9bc4..588e3b1d 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.15.0" + "version": "1.16.0" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index ef503495..0f9ab32b 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.15.0", + "version": "1.16.0", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index 1d959f31..dd961c9a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.15.0** +**Current Version: 1.16.0** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. From 611a437ae3026ab1ec16fbd8f1d97899f35b793d Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 17:01:18 -0700 Subject: [PATCH 26/97] Fix codex_hooks flag probe, managed-hook regex, and legacy compat - Guard --disable codex_hooks behind a feature probe so older Codex builds that lack this flag do not fail with an unknown-argument error - Widen managed-hook cleanup regex to match any runtime root, not just the hardcoded humanize/hooks/ path, preserving idempotent installs - Make round contract enforcement conditional on drift_status presence so legacy loops without anti-drift fields are not blocked - Fix pre-existing test-stop-hook-legacy-compat Test 1b by adding a .humanize-old directory (global gitignore covers .humanize/ itself) - Add drift_status fields to test-plan-file-hooks fixture for contract enforcement coverage --- hooks/loop-codex-stop-hook.sh | 12 ++++++++++-- hooks/pr-loop-stop-hook.sh | 7 ++++++- scripts/install-codex-hooks.sh | 2 +- tests/test-plan-file-hooks.sh | 3 +++ tests/test-stop-hook-legacy-compat.sh | 5 +++++ 5 files changed, 25 insertions(+), 4 deletions(-) diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 87a7f060..5f40a1c9 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -757,7 +757,10 @@ fi # Check Round Contract Exists # ======================================== -if [[ "$IS_FINALIZE_PHASE" != "true" ]]; then +# Only enforce round contract when anti-drift is active (drift_status present in raw state). +# Legacy loops that pre-date the anti-drift feature will not have this field. +RAW_DRIFT_STATUS=$(echo "$RAW_FRONTMATTER" | grep "^drift_status:" || true) +if [[ "$IS_FINALIZE_PHASE" != "true" ]] && [[ -n "$RAW_DRIFT_STATUS" ]]; then if [[ ! -f "$ROUND_CONTRACT_FILE" ]]; then FALLBACK="# Round Contract Missing @@ -1060,7 +1063,12 @@ mkdir -p "$CACHE_DIR" # portable-timeout.sh already sourced above # Disable native hooks for nested Codex reviewer calls to prevent Stop-hook recursion. -CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) +# Probe whether the installed Codex CLI supports --disable; fall back to empty args +# so older builds do not fail with an unknown-argument error. +CODEX_DISABLE_HOOKS_ARGS=() +if codex --help 2>&1 | grep -q -- '--disable'; then + CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) +fi # Build command arguments for summary review (codex exec) CODEX_EXEC_ARGS=("-m" "$CODEX_EXEC_MODEL") diff --git a/hooks/pr-loop-stop-hook.sh b/hooks/pr-loop-stop-hook.sh index 93eac614..c87abee3 100755 --- a/hooks/pr-loop-stop-hook.sh +++ b/hooks/pr-loop-stop-hook.sh @@ -1335,7 +1335,12 @@ if [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "true" ]] || [[ "${HUMANIZE_CODEX_ fi # Disable native hooks for nested Codex reviewer calls to prevent Stop-hook recursion. -CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) +# Probe whether the installed Codex CLI supports --disable; fall back to empty args +# so older builds do not fail with an unknown-argument error. +CODEX_DISABLE_HOOKS_ARGS=() +if codex --help 2>&1 | grep -q -- '--disable'; then + CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) +fi CODEX_ARGS+=("$CODEX_AUTO_FLAG" "-C" "$PROJECT_ROOT") diff --git a/scripts/install-codex-hooks.sh b/scripts/install-codex-hooks.sh index b6c32804..665eff46 100755 --- a/scripts/install-codex-hooks.sh +++ b/scripts/install-codex-hooks.sh @@ -123,7 +123,7 @@ if stop_groups is None: if not isinstance(stop_groups, list): raise SystemExit(f"existing hooks config has invalid Stop array: {hooks_file}") -managed_pattern = re.compile(r"(^|/)humanize/hooks/(loop-codex-stop-hook\.sh|pr-loop-stop-hook\.sh)$") +managed_pattern = re.compile(r"(^|/)hooks/(loop-codex-stop-hook\.sh|pr-loop-stop-hook\.sh)(\s|$)") filtered_groups = [] for group in stop_groups: diff --git a/tests/test-plan-file-hooks.sh b/tests/test-plan-file-hooks.sh index b15ca134..ec3eac06 100755 --- a/tests/test-plan-file-hooks.sh +++ b/tests/test-plan-file-hooks.sh @@ -130,6 +130,9 @@ plan_tracked: false start_branch: $CURRENT_BRANCH base_branch: $CURRENT_BRANCH review_started: false +mainline_stall_count: 0 +last_mainline_verdict: unknown +drift_status: normal --- EOF diff --git a/tests/test-stop-hook-legacy-compat.sh b/tests/test-stop-hook-legacy-compat.sh index 3527f75b..7fa1f449 100755 --- a/tests/test-stop-hook-legacy-compat.sh +++ b/tests/test-stop-hook-legacy-compat.sh @@ -163,6 +163,11 @@ echo "Test 1b: Untracked .humanizeconfig still blocks dirty checks" TEST1B_REPO="$TEST_DIR/test1b" create_stop_hook_fixture "$TEST1B_REPO" touch "$TEST1B_REPO/.humanizeconfig" +# Also create a .humanize-old directory to trigger the "Special Case" note. +# The .humanize/ directory itself may be covered by a global gitignore +# so it might not appear as untracked; .humanize-old/ is never globally ignored. +mkdir -p "$TEST1B_REPO/.humanize-old" +echo "legacy" > "$TEST1B_REPO/.humanize-old/legacy.txt" run_stop_hook "$TEST1B_REPO" if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ ! -f "$RUN_MARKER" ]] && \ From 2a0faf6e3cec0db56e10fc126f8e9416d594a173 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 17:10:55 -0700 Subject: [PATCH 27/97] Fix BSD sed portability, awk field splitting, and strict parser defaults - Replace GNU-only sed /I flag with portable grep -oEi for verdict extraction in extract_mainline_progress_verdict - Fix upsert_state_fields awk to split on first = only using index/substr, preventing silent value truncation on values containing = - Add STATE_PRIVACY_MODE default to parse_state_file_strict for parity with the tolerant parser - Cache codex --disable feature probe per loop to avoid running codex --help on every stop-hook invocation --- hooks/lib/loop-common.sh | 13 +++++++++---- hooks/loop-codex-stop-hook.sh | 10 ++++++++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 797f6e31..a3283a02 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -532,6 +532,7 @@ parse_state_file_strict() { STATE_FULL_REVIEW_ROUND="${STATE_FULL_REVIEW_ROUND:-5}" STATE_ASK_CODEX_QUESTION="${STATE_ASK_CODEX_QUESTION:-true}" STATE_AGENT_TEAMS="${STATE_AGENT_TEAMS:-false}" + STATE_PRIVACY_MODE="${STATE_PRIVACY_MODE:-true}" STATE_MAINLINE_STALL_COUNT="${STATE_MAINLINE_STALL_COUNT:-0}" STATE_LAST_MAINLINE_VERDICT="${STATE_LAST_MAINLINE_VERDICT:-$MAINLINE_VERDICT_UNKNOWN}" STATE_DRIFT_STATUS="${STATE_DRIFT_STATUS:-$DRIFT_STATUS_NORMAL}" @@ -585,7 +586,9 @@ extract_mainline_progress_verdict() { return fi - verdict_value=$(printf '%s\n' "$verdict_line" | sed -E 's/.*Mainline Progress Verdict:[[:space:]]*(ADVANCED|STALLED|REGRESSED).*/\1/I') + # Extract the verdict word using grep -oEi (portable) instead of sed /I (GNU-only). + # The preceding grep -Ei already ensures the line contains one of the three verdicts. + verdict_value=$(printf '%s\n' "$verdict_line" | grep -oEi 'ADVANCED|STALLED|REGRESSED' | tail -1) normalize_mainline_progress_verdict "$verdict_value" } @@ -602,9 +605,11 @@ upsert_state_fields() { BEGIN { count = split(assignments, pairs, " "); for (i = 1; i <= count; i++) { - split(pairs[i], kv, "="); - keys[kv[1]] = kv[2]; - order[i] = kv[1]; + eq = index(pairs[i], "="); + key = substr(pairs[i], 1, eq - 1); + val = substr(pairs[i], eq + 1); + keys[key] = val; + order[i] = key; } separator_count = 0; } diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 5f40a1c9..0682ff6e 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -1063,11 +1063,17 @@ mkdir -p "$CACHE_DIR" # portable-timeout.sh already sourced above # Disable native hooks for nested Codex reviewer calls to prevent Stop-hook recursion. -# Probe whether the installed Codex CLI supports --disable; fall back to empty args +# Probe whether the installed Codex CLI supports --disable; cache the result per loop # so older builds do not fail with an unknown-argument error. CODEX_DISABLE_HOOKS_ARGS=() -if codex --help 2>&1 | grep -q -- '--disable'; then +_CODEX_FEATURE_CACHE="$CACHE_DIR/.codex-disable-hooks-supported" +if [[ -f "$_CODEX_FEATURE_CACHE" ]]; then + [[ "$(cat "$_CODEX_FEATURE_CACHE")" == "yes" ]] && CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) +elif codex --help 2>&1 | grep -q -- '--disable'; then CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) + echo "yes" > "$_CODEX_FEATURE_CACHE" 2>/dev/null +else + echo "no" > "$_CODEX_FEATURE_CACHE" 2>/dev/null fi # Build command arguments for summary review (codex exec) From d122c82591023cda401d29221b71581ed5e9f1c4 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 17:17:22 -0700 Subject: [PATCH 28/97] Fix path injection in shim generation, JSON escaping, and legacy tracker compat - Escape runtime paths in bitlesson-selector shim using single-quoted strings to prevent command injection via paths containing shell metacharacters - JSON-escape runtime_root before template substitution in install-codex-hooks.sh to prevent JSON corruption from paths containing quotes or backslashes - Allow goal-tracker edits on legacy trackers that lack the IMMUTABLE SECTION header instead of blocking all writes --- hooks/lib/loop-common.sh | 3 ++- scripts/install-codex-hooks.sh | 5 ++++- scripts/install-skill.sh | 17 +++++++++++------ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index a3283a02..30dcbb11 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -888,7 +888,8 @@ goal_tracker_mutable_update_allowed() { current_immutable=$(extract_goal_tracker_immutable_from_file "$tracker_file" 2>/dev/null || true) updated_immutable=$(extract_goal_tracker_immutable_from_text "$updated_content" 2>/dev/null || true) - [[ -n "$current_immutable" ]] || return 1 + # Legacy trackers without IMMUTABLE SECTION: allow edits unconditionally. + [[ -n "$current_immutable" ]] || return 0 [[ "$current_immutable" == "$updated_immutable" ]] } diff --git a/scripts/install-codex-hooks.sh b/scripts/install-codex-hooks.sh index 665eff46..cd920690 100755 --- a/scripts/install-codex-hooks.sh +++ b/scripts/install-codex-hooks.sh @@ -102,7 +102,10 @@ template_file = pathlib.Path(sys.argv[2]) runtime_root = sys.argv[3] template_text = template_file.read_text(encoding="utf-8") -template_text = template_text.replace("{{HUMANIZE_RUNTIME_ROOT}}", runtime_root) +# JSON-escape the runtime root so metacharacters (quotes, backslashes) do not +# corrupt the template before json.loads parses it. +escaped_root = json.dumps(runtime_root)[1:-1] # strip outer quotes from dumps output +template_text = template_text.replace("{{HUMANIZE_RUNTIME_ROOT}}", escaped_root) template = json.loads(template_text) existing = {} diff --git a/scripts/install-skill.sh b/scripts/install-skill.sh index 1e85b743..fa546618 100755 --- a/scripts/install-skill.sh +++ b/scripts/install-skill.sh @@ -343,18 +343,23 @@ install_bitlesson_selector_shim() { mkdir -p "$COMMAND_BIN_DIR" - cat > "$shim_path" < "$shim_path" <> "$shim_path" <> "$shim_path" <> "$shim_path" <<'EOF' From febbc00e46477bfb67dff67e4469ab311535af44 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 17:46:57 -0700 Subject: [PATCH 29/97] Fix relative path bypass in methodology analysis read guard When realpath is unavailable, the fallback assigned FILE_PATH verbatim which could remain relative. The project-root prefix check only matched absolute paths, allowing relative reads to slip through during the methodology analysis phase. Now normalize relative paths to absolute by prepending PROJECT_ROOT before the guard checks. --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- hooks/loop-read-validator.sh | 17 +++++++++++++++-- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 588e3b1d..8cb47b76 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.0" + "version": "1.16.1" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 0f9ab32b..5f6a99dd 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.0", + "version": "1.16.1", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index dd961c9a..1c5c88b8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.16.0** +**Current Version: 1.16.1** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 5cb32a17..9832b4e7 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -97,8 +97,21 @@ if [[ -n "$_MA_CHECK_DIR" ]]; then fi _ma_real_loop=$(realpath "$_MA_CHECK_DIR" 2>/dev/null || echo "") # Fallback to raw paths when realpath is unavailable (older macOS/BSD) - [[ -z "$_ma_real_path" ]] && _ma_real_path="$FILE_PATH" - [[ -z "$_ma_real_loop" ]] && _ma_real_loop="$_MA_CHECK_DIR" + # Ensure paths are absolute so prefix guards cannot be bypassed + if [[ -z "$_ma_real_path" ]]; then + if [[ "$FILE_PATH" == /* ]]; then + _ma_real_path="$FILE_PATH" + else + _ma_real_path="$PROJECT_ROOT/$FILE_PATH" + fi + fi + if [[ -z "$_ma_real_loop" ]]; then + if [[ "$_MA_CHECK_DIR" == /* ]]; then + _ma_real_loop="$_MA_CHECK_DIR" + else + _ma_real_loop="$PROJECT_ROOT/$_MA_CHECK_DIR" + fi + fi if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") # Allowlist: only methodology artifacts (not raw development records). From 17fabf7b22be9d9373517f7dfec42308e9d49656 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 17:57:44 -0700 Subject: [PATCH 30/97] Revert version back to 1.16.0 on dev branch --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 8cb47b76..588e3b1d 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.1" + "version": "1.16.0" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 5f6a99dd..0f9ab32b 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.1", + "version": "1.16.0", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index 1c5c88b8..dd961c9a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.16.1** +**Current Version: 1.16.0** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. From 2ab5361b366c4d1b6607815ac04da54ae3df0600 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 18:05:15 -0700 Subject: [PATCH 31/97] Fix relative path bypass in write validator and add Codex flag probes in bitlesson selector Write validator had the same relative-path fallback issue as the read validator: when realpath is unavailable, relative FILE_PATH bypassed the absolute prefix guard during methodology analysis. Bitlesson selector unconditionally passed --disable codex_hooks, --skip-git-repo-check, and --ephemeral to codex exec without checking if the CLI supports them. Added capability probes matching the pattern already used in loop-codex-stop-hook.sh. --- hooks/loop-write-validator.sh | 17 +++++++++++++++-- scripts/bitlesson-select.sh | 17 +++++++++++++---- tests/test-bitlesson-select-routing.sh | 9 +++++++++ 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 5dabb7ec..8abc2e8c 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -121,8 +121,21 @@ if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.m fi _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") # Fallback to raw paths when realpath is unavailable (older macOS/BSD) - [[ -z "$_ma_real_path" ]] && _ma_real_path="$FILE_PATH" - [[ -z "$_ma_real_loop" ]] && _ma_real_loop="$_MA_LOOP_DIR" + # Ensure paths are absolute so prefix guards cannot be bypassed + if [[ -z "$_ma_real_path" ]]; then + if [[ "$FILE_PATH" == /* ]]; then + _ma_real_path="$FILE_PATH" + else + _ma_real_path="$PROJECT_ROOT/$FILE_PATH" + fi + fi + if [[ -z "$_ma_real_loop" ]]; then + if [[ "$_MA_LOOP_DIR" == /* ]]; then + _ma_real_loop="$_MA_LOOP_DIR" + else + _ma_real_loop="$PROJECT_ROOT/$_MA_LOOP_DIR" + fi + fi if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") case "$_ma_basename" in diff --git a/scripts/bitlesson-select.sh b/scripts/bitlesson-select.sh index cd002063..d30ef319 100755 --- a/scripts/bitlesson-select.sh +++ b/scripts/bitlesson-select.sh @@ -185,10 +185,19 @@ run_selector() { local model="$2" if [[ "$provider" == "codex" ]]; then - local codex_exec_args=( - "--disable" "codex_hooks" - "--skip-git-repo-check" - "--ephemeral" + local codex_exec_args=() + # Probe whether the installed Codex CLI supports --disable flag + if codex --help 2>&1 | grep -q -- '--disable'; then + codex_exec_args+=("--disable" "codex_hooks") + fi + # Probe for --skip-git-repo-check and --ephemeral support + if codex exec --help 2>&1 | grep -q -- '--skip-git-repo-check'; then + codex_exec_args+=("--skip-git-repo-check") + fi + if codex exec --help 2>&1 | grep -q -- '--ephemeral'; then + codex_exec_args+=("--ephemeral") + fi + codex_exec_args+=( "-s" "read-only" "-m" "$model" "-c" "model_reasoning_effort=low" diff --git a/tests/test-bitlesson-select-routing.sh b/tests/test-bitlesson-select-routing.sh index acf657d2..bd23ab45 100755 --- a/tests/test-bitlesson-select-routing.sh +++ b/tests/test-bitlesson-select-routing.sh @@ -448,6 +448,15 @@ CAPTURE_BIN="$TEST_DIR/capture-bin" mkdir -p "$CAPTURE_BIN" cat > "$CAPTURE_BIN/codex" <<'EOF' #!/usr/bin/env bash +# Respond to help probes with supported flags +for arg in "$@"; do + if [[ "$arg" == "--help" ]]; then + echo " --disable Disable a feature" + echo " --skip-git-repo-check Skip git repo check" + echo " --ephemeral Ephemeral mode" + exit 0 + fi +done printf '%s\n' "$@" > "${TEST_CAPTURE_ARGS:?}" cat > /dev/null cat <<'OUT' From 4326860a5484202753b2c8822ad1bbe8b1985ad4 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 18:22:01 -0700 Subject: [PATCH 32/97] Fix mainline verdict parser picking last keyword instead of first When the verdict line contains multiple keywords (e.g. reviewer echoes all options), grep -oEi | tail -1 selected the last match which could be REGRESSED even when ADVANCED was the actual verdict. Changed to head -1 to pick the first keyword after the label. --- hooks/lib/loop-common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 30dcbb11..480a7ab0 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -588,7 +588,7 @@ extract_mainline_progress_verdict() { # Extract the verdict word using grep -oEi (portable) instead of sed /I (GNU-only). # The preceding grep -Ei already ensures the line contains one of the three verdicts. - verdict_value=$(printf '%s\n' "$verdict_line" | grep -oEi 'ADVANCED|STALLED|REGRESSED' | tail -1) + verdict_value=$(printf '%s\n' "$verdict_line" | grep -oEi 'ADVANCED|STALLED|REGRESSED' | head -1) normalize_mainline_progress_verdict "$verdict_value" } From 016caca398c5b450db14c928f69b6927a6ac557f Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 18:28:04 -0700 Subject: [PATCH 33/97] Add ask-gemini skill and tool-filtered monitor subcommands Introduce ask-gemini skill that wraps the Gemini CLI in non-interactive mode with built-in web-search instruction, defaulting to gemini-3.1-pro-preview. Supports --gemini-model and --gemini-timeout flags. Add tool metadata tagging (tool: codex / tool: gemini) to both ask-codex and ask-gemini invocations so the monitor can distinguish them. Extend humanize monitor with codex and gemini subcommands that filter skill invocations by tool type. The existing skill subcommand continues to show all invocations. Bump version to 1.17.0. --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 14 +- scripts/ask-codex.sh | 5 + scripts/ask-gemini.sh | 385 ++++++++++++++++++++++++++++++++ scripts/humanize.sh | 16 +- scripts/lib/monitor-skill.sh | 108 +++++++-- skills/ask-gemini/SKILL.md | 61 +++++ 8 files changed, 571 insertions(+), 22 deletions(-) create mode 100755 scripts/ask-gemini.sh create mode 100644 skills/ask-gemini/SKILL.md diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 588e3b1d..e4e1adf3 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.0" + "version": "1.17.0" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 0f9ab32b..bbab412d 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.0", + "version": "1.17.0", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index dd961c9a..0a93dd30 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.16.0** +**Current Version: 1.17.0** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. @@ -55,10 +55,18 @@ Requires [codex CLI](https://github.com/openai/codex) for review. See the full [ /humanize:start-rlcr-loop docs/plan.md ``` -4. **Monitor progress**: +4. **Consult Gemini** for deep web research (requires Gemini CLI): + ```bash + /humanize:ask-gemini What are the latest best practices for X? + ``` + +5. **Monitor progress**: ```bash source /scripts/humanize.sh - humanize monitor rlcr + humanize monitor rlcr # RLCR loop + humanize monitor skill # All skill invocations (codex + gemini) + humanize monitor codex # Codex invocations only + humanize monitor gemini # Gemini invocations only ``` ## Monitor Dashboard diff --git a/scripts/ask-codex.sh b/scripts/ask-codex.sh index ac26fc32..bea40b4e 100755 --- a/scripts/ask-codex.sh +++ b/scripts/ask-codex.sh @@ -234,6 +234,7 @@ $QUESTION - Effort: $CODEX_EFFORT - Timeout: ${CODEX_TIMEOUT}s - Timestamp: $TIMESTAMP +- Tool: codex EOF # ======================================== @@ -317,6 +318,7 @@ if [[ $CODEX_EXIT_CODE -eq 124 ]]; then # Save metadata even on timeout cat > "$SKILL_DIR/metadata.md" << EOF --- +tool: codex model: $CODEX_MODEL effort: $CODEX_EFFORT timeout: $CODEX_TIMEOUT @@ -343,6 +345,7 @@ if [[ $CODEX_EXIT_CODE -ne 0 ]]; then # Save metadata cat > "$SKILL_DIR/metadata.md" << EOF --- +tool: codex model: $CODEX_MODEL effort: $CODEX_EFFORT timeout: $CODEX_TIMEOUT @@ -368,6 +371,7 @@ if [[ ! -s "$CODEX_STDOUT_FILE" ]]; then cat > "$SKILL_DIR/metadata.md" << EOF --- +tool: codex model: $CODEX_MODEL effort: $CODEX_EFFORT timeout: $CODEX_TIMEOUT @@ -390,6 +394,7 @@ cp "$CODEX_STDOUT_FILE" "$SKILL_DIR/output.md" # Save metadata cat > "$SKILL_DIR/metadata.md" << EOF --- +tool: codex model: $CODEX_MODEL effort: $CODEX_EFFORT timeout: $CODEX_TIMEOUT diff --git a/scripts/ask-gemini.sh b/scripts/ask-gemini.sh new file mode 100755 index 00000000..489bc4db --- /dev/null +++ b/scripts/ask-gemini.sh @@ -0,0 +1,385 @@ +#!/usr/bin/env bash +# +# Ask Gemini - One-shot consultation with Gemini CLI +# +# Sends a question or task to gemini in non-interactive mode and returns +# the response. Gemini is always instructed to leverage Google Search +# for deep web research. +# +# Usage: +# ask-gemini.sh [--gemini-model MODEL] [--gemini-timeout SECONDS] [question...] +# +# Output: +# stdout: Gemini's response (for Claude to read) +# stderr: Status/debug info (model, log paths) +# +# Storage: +# Project-local: .humanize/skill//{input,output,metadata}.md +# Cache: ~/.cache/humanize//skill-/gemini-run.{cmd,out,log} +# + +set -euo pipefail + +# ======================================== +# Source Shared Libraries +# ======================================== + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" + +# Source portable timeout wrapper +source "$SCRIPT_DIR/portable-timeout.sh" + +# ======================================== +# Default Configuration +# ======================================== + +DEFAULT_GEMINI_MODEL="gemini-3.1-pro-preview" +DEFAULT_ASK_GEMINI_TIMEOUT=3600 + +GEMINI_MODEL="$DEFAULT_GEMINI_MODEL" +GEMINI_TIMEOUT="$DEFAULT_ASK_GEMINI_TIMEOUT" + +# ======================================== +# Help +# ======================================== + +show_help() { + cat << 'HELP_EOF' +ask-gemini - One-shot deep-research consultation with Gemini + +USAGE: + /humanize:ask-gemini [OPTIONS] + +OPTIONS: + --gemini-model + Gemini model name (default: gemini-3.1-pro-preview) + --gemini-timeout + Timeout for the Gemini query in seconds (default: 3600) + -h, --help Show this help message + +DESCRIPTION: + Sends a one-shot question or task to the Gemini CLI in non-interactive + mode (-p). The prompt is augmented with an instruction to perform web + research via Google Search, making this ideal for deep-research tasks + that benefit from up-to-date internet information. + + The response is saved to .humanize/skill//output.md for reference. + +EXAMPLES: + /humanize:ask-gemini What are the latest best practices for Rust error handling? + /humanize:ask-gemini --gemini-model gemini-2.5-pro Review recent CVEs for OpenSSL 3.x + /humanize:ask-gemini --gemini-timeout 600 Compare React Server Components vs Astro Islands + +ENVIRONMENT: + HUMANIZE_GEMINI_YOLO + Set to "true" or "1" to auto-approve all Gemini tool calls (--yolo). + Default behaviour uses --sandbox mode. +HELP_EOF + exit 0 +} + +# ======================================== +# Parse Arguments +# ======================================== + +QUESTION_PARTS=() +OPTIONS_DONE=false + +while [[ $# -gt 0 ]]; do + if [[ "$OPTIONS_DONE" == "true" ]]; then + QUESTION_PARTS+=("$1") + shift + continue + fi + case $1 in + -h|--help) + show_help + ;; + --) + OPTIONS_DONE=true + shift + ;; + --gemini-model) + if [[ -z "${2:-}" ]]; then + echo "Error: --gemini-model requires a MODEL argument" >&2 + exit 1 + fi + GEMINI_MODEL="$2" + shift 2 + ;; + --gemini-timeout) + if [[ -z "${2:-}" ]]; then + echo "Error: --gemini-timeout requires a number argument (seconds)" >&2 + exit 1 + fi + if ! [[ "$2" =~ ^[0-9]+$ ]]; then + echo "Error: --gemini-timeout must be a positive integer (seconds), got: $2" >&2 + exit 1 + fi + GEMINI_TIMEOUT="$2" + shift 2 + ;; + -*) + echo "Error: Unknown option: $1" >&2 + echo "Use --help for usage information" >&2 + exit 1 + ;; + *) + QUESTION_PARTS+=("$1") + OPTIONS_DONE=true + shift + ;; + esac +done + +# Join question parts into a single string +QUESTION="${QUESTION_PARTS[*]}" + +# ======================================== +# Validate Prerequisites +# ======================================== + +if ! command -v gemini &>/dev/null; then + echo "Error: 'gemini' command is not installed or not in PATH" >&2 + echo "" >&2 + echo "Please install Gemini CLI: npm install -g @anthropic-ai/gemini-cli or https://github.com/anthropics/gemini-cli" >&2 + echo "Then retry: /humanize:ask-gemini " >&2 + exit 1 +fi + +if [[ -z "$QUESTION" ]]; then + echo "Error: No question or task provided" >&2 + echo "" >&2 + echo "Usage: /humanize:ask-gemini [OPTIONS] " >&2 + echo "" >&2 + echo "For help: /humanize:ask-gemini --help" >&2 + exit 1 +fi + +# Validate model name for safety (alphanumeric, hyphen, underscore, dot) +if [[ ! "$GEMINI_MODEL" =~ ^[a-zA-Z0-9._-]+$ ]]; then + echo "Error: Gemini model contains invalid characters" >&2 + echo " Model: $GEMINI_MODEL" >&2 + echo " Only alphanumeric, hyphen, underscore, dot allowed" >&2 + exit 1 +fi + +# ======================================== +# Detect Project Root +# ======================================== + +if git rev-parse --show-toplevel &>/dev/null; then + PROJECT_ROOT=$(git rev-parse --show-toplevel) +else + PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" +fi + +# ======================================== +# Create Storage Directories +# ======================================== + +TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S) +UNIQUE_ID="${TIMESTAMP}-$$-$(head -c 4 /dev/urandom | od -An -tx1 | tr -d ' \n')" + +# Project-local storage: .humanize/skill// +SKILL_DIR="$PROJECT_ROOT/.humanize/skill/$UNIQUE_ID" +mkdir -p "$SKILL_DIR" + +# Cache storage: ~/.cache/humanize//skill-/ +SANITIZED_PROJECT_PATH=$(echo "$PROJECT_ROOT" | sed 's/[^a-zA-Z0-9._-]/-/g' | sed 's/--*/-/g') +CACHE_BASE="${XDG_CACHE_HOME:-$HOME/.cache}" +CACHE_DIR="$CACHE_BASE/humanize/$SANITIZED_PROJECT_PATH/skill-$UNIQUE_ID" +if ! mkdir -p "$CACHE_DIR" 2>/dev/null; then + CACHE_DIR="$SKILL_DIR/cache" + mkdir -p "$CACHE_DIR" + echo "ask-gemini: warning: home cache not writable, using $CACHE_DIR" >&2 +fi + +# ======================================== +# Save Input +# ======================================== + +cat > "$SKILL_DIR/input.md" << EOF +# Ask Gemini Input + +## Question + +$QUESTION + +## Configuration + +- Model: $GEMINI_MODEL +- Timeout: ${GEMINI_TIMEOUT}s +- Timestamp: $TIMESTAMP +- Tool: gemini +EOF + +# ======================================== +# Build Gemini Command +# ======================================== + +GEMINI_ARGS=("-m" "$GEMINI_MODEL") + +# Determine approval mode +if [[ "${HUMANIZE_GEMINI_YOLO:-}" == "true" ]] || [[ "${HUMANIZE_GEMINI_YOLO:-}" == "1" ]]; then + GEMINI_ARGS+=("--yolo") +else + GEMINI_ARGS+=("--sandbox") +fi + +# Use text output format for clean stdout +GEMINI_ARGS+=("-o" "text") + +# Build the augmented prompt with web-search instruction +AUGMENTED_PROMPT="You MUST use Google Search to find the most up-to-date and accurate information before answering. Perform thorough web research. Cite sources where possible. + +--- + +$QUESTION" + +# ======================================== +# Save Debug Command +# ======================================== + +GEMINI_CMD_FILE="$CACHE_DIR/gemini-run.cmd" +GEMINI_STDOUT_FILE="$CACHE_DIR/gemini-run.out" +GEMINI_STDERR_FILE="$CACHE_DIR/gemini-run.log" + +{ + echo "# Gemini ask-gemini invocation debug info" + echo "# Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" + echo "# Working directory: $PROJECT_ROOT" + echo "# Timeout: $GEMINI_TIMEOUT seconds" + echo "" + echo "gemini ${GEMINI_ARGS[*]} -p \"\"" + echo "" + echo "# Prompt content:" + echo "$AUGMENTED_PROMPT" +} > "$GEMINI_CMD_FILE" + +# ======================================== +# Run Gemini +# ======================================== + +echo "ask-gemini: model=$GEMINI_MODEL timeout=${GEMINI_TIMEOUT}s" >&2 +echo "ask-gemini: cache=$CACHE_DIR" >&2 +echo "ask-gemini: running gemini -p ..." >&2 + +# Portable epoch-to-ISO8601 formatter +epoch_to_iso() { + local epoch="$1" + date -u -d "@$epoch" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || + date -u -r "$epoch" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || + echo "unknown" +} + +START_TIME=$(date +%s) + +GEMINI_EXIT_CODE=0 +run_with_timeout "$GEMINI_TIMEOUT" gemini "${GEMINI_ARGS[@]}" -p "$AUGMENTED_PROMPT" \ + > "$GEMINI_STDOUT_FILE" 2> "$GEMINI_STDERR_FILE" || GEMINI_EXIT_CODE=$? + +END_TIME=$(date +%s) +DURATION=$((END_TIME - START_TIME)) + +echo "ask-gemini: exit_code=$GEMINI_EXIT_CODE duration=${DURATION}s" >&2 + +# ======================================== +# Handle Results +# ======================================== + +if [[ $GEMINI_EXIT_CODE -eq 124 ]]; then + echo "Error: Gemini timed out after ${GEMINI_TIMEOUT} seconds" >&2 + echo "" >&2 + echo "Try increasing the timeout:" >&2 + echo " /humanize:ask-gemini --gemini-timeout $((GEMINI_TIMEOUT * 2)) " >&2 + echo "" >&2 + echo "Debug logs: $CACHE_DIR" >&2 + + cat > "$SKILL_DIR/metadata.md" << EOF +--- +tool: gemini +model: $GEMINI_MODEL +timeout: $GEMINI_TIMEOUT +exit_code: 124 +duration: ${DURATION}s +status: timeout +started_at: $(epoch_to_iso "$START_TIME") +--- +EOF + exit 124 +fi + +if [[ $GEMINI_EXIT_CODE -ne 0 ]]; then + echo "Error: Gemini exited with code $GEMINI_EXIT_CODE" >&2 + if [[ -s "$GEMINI_STDERR_FILE" ]]; then + echo "" >&2 + echo "Gemini stderr (last 20 lines):" >&2 + tail -20 "$GEMINI_STDERR_FILE" >&2 + fi + echo "" >&2 + echo "Debug logs: $CACHE_DIR" >&2 + + cat > "$SKILL_DIR/metadata.md" << EOF +--- +tool: gemini +model: $GEMINI_MODEL +timeout: $GEMINI_TIMEOUT +exit_code: $GEMINI_EXIT_CODE +duration: ${DURATION}s +status: error +started_at: $(epoch_to_iso "$START_TIME") +--- +EOF + exit "$GEMINI_EXIT_CODE" +fi + +if [[ ! -s "$GEMINI_STDOUT_FILE" ]]; then + echo "Error: Gemini returned empty response" >&2 + if [[ -s "$GEMINI_STDERR_FILE" ]]; then + echo "" >&2 + echo "Gemini stderr (last 20 lines):" >&2 + tail -20 "$GEMINI_STDERR_FILE" >&2 + fi + echo "" >&2 + echo "Debug logs: $CACHE_DIR" >&2 + + cat > "$SKILL_DIR/metadata.md" << EOF +--- +tool: gemini +model: $GEMINI_MODEL +timeout: $GEMINI_TIMEOUT +exit_code: 0 +duration: ${DURATION}s +status: empty_response +started_at: $(epoch_to_iso "$START_TIME") +--- +EOF + exit 1 +fi + +# ======================================== +# Save Output and Metadata +# ======================================== + +cp "$GEMINI_STDOUT_FILE" "$SKILL_DIR/output.md" + +cat > "$SKILL_DIR/metadata.md" << EOF +--- +tool: gemini +model: $GEMINI_MODEL +timeout: $GEMINI_TIMEOUT +exit_code: 0 +duration: ${DURATION}s +status: success +started_at: $(epoch_to_iso "$START_TIME") +--- +EOF + +echo "ask-gemini: response saved to $SKILL_DIR/output.md" >&2 + +# ======================================== +# Output Response +# ======================================== + +cat "$GEMINI_STDOUT_FILE" diff --git a/scripts/humanize.sh b/scripts/humanize.sh index a3492844..c5ac3f20 100755 --- a/scripts/humanize.sh +++ b/scripts/humanize.sh @@ -1182,13 +1182,21 @@ humanize() { skill) _humanize_monitor_skill "$@" ;; + codex) + _humanize_monitor_skill --tool-filter codex "$@" + ;; + gemini) + _humanize_monitor_skill --tool-filter gemini "$@" + ;; *) - echo "Usage: humanize monitor " + echo "Usage: humanize monitor " echo "" echo "Subcommands:" echo " rlcr Monitor the latest RLCR loop log from .humanize/rlcr" echo " pr Monitor the latest PR loop from .humanize/pr-loop" - echo " skill Monitor ask-codex skill invocations from .humanize/skill" + echo " skill Monitor all skill invocations (codex + gemini)" + echo " codex Monitor ask-codex skill invocations only" + echo " gemini Monitor ask-gemini skill invocations only" echo "" echo "Features:" echo " - Fixed status bar showing session info, round progress, model config" @@ -1205,7 +1213,9 @@ humanize() { echo "Commands:" echo " monitor rlcr Monitor the latest RLCR loop log" echo " monitor pr Monitor the latest PR loop" - echo " monitor skill Monitor ask-codex skill invocations" + echo " monitor skill Monitor all skill invocations (codex + gemini)" + echo " monitor codex Monitor ask-codex skill invocations only" + echo " monitor gemini Monitor ask-gemini skill invocations only" return 1 ;; esac diff --git a/scripts/lib/monitor-skill.sh b/scripts/lib/monitor-skill.sh index 218fab92..8803f139 100644 --- a/scripts/lib/monitor-skill.sh +++ b/scripts/lib/monitor-skill.sh @@ -3,15 +3,18 @@ # monitor-skill.sh - Skill monitor for humanize # # Provides the _humanize_monitor_skill function for monitoring -# ask-codex skill invocations from .humanize/skill directory. +# skill invocations (ask-codex, ask-gemini) from .humanize/skill directory. # # This file is sourced by humanize.sh and depends on: # - monitor-common.sh (monitor_get_yaml_value, monitor_format_timestamp, etc.) # - humanize.sh (humanize_split_to_array) -# Monitor ask-codex skill invocations from .humanize/skill +# Monitor skill invocations from .humanize/skill # Shows a fixed status bar with aggregate stats and latest invocation details, # with live output display in the scrollable area below. +# +# Accepts --tool-filter to show only invocations from a +# specific tool. Without the filter, all invocations are shown. _humanize_monitor_skill() { # Enable 0-indexed arrays in zsh for bash compatibility # no_monitor suppresses background job notifications ([1] PID) @@ -23,11 +26,16 @@ _humanize_monitor_skill() { local check_interval=2 local status_bar_height=9 local once_mode=false + local tool_filter="" # Parse arguments while [[ $# -gt 0 ]]; do case "$1" in --once) once_mode=true; shift ;; + --tool-filter) + tool_filter="${2:-}" + shift 2 + ;; *) shift ;; esac done @@ -35,10 +43,37 @@ _humanize_monitor_skill() { # Check if .humanize/skill exists if [[ ! -d "$skill_dir" ]]; then echo "Error: $skill_dir directory not found in current directory" - echo "Run /humanize:ask-codex first to create skill invocations" + echo "Run /humanize:ask-codex or /humanize:ask-gemini first to create skill invocations" return 1 fi + # Determine the tool for a given invocation directory. + # Reads metadata.md first (completed), falls back to input.md (running). + # Returns: codex, gemini, or unknown + _skill_get_tool() { + local dir="$1" + if [[ -f "$dir/metadata.md" ]]; then + local t=$(monitor_get_yaml_value "tool" "$dir/metadata.md") + [[ -n "$t" ]] && { echo "$t"; return; } + fi + if [[ -f "$dir/input.md" ]]; then + local t=$(grep -E '^- Tool:' "$dir/input.md" 2>/dev/null | sed 's/- Tool: //') + [[ -n "$t" ]] && { echo "$t"; return; } + fi + echo "unknown" + } + + # Check whether a directory passes the current tool filter. + # Returns 0 (pass) or 1 (skip). + _skill_passes_filter() { + [[ -z "$tool_filter" ]] && return 0 + local t=$(_skill_get_tool "$1") + [[ "$t" == "$tool_filter" ]] && return 0 + # Legacy invocations without a tool tag are treated as codex + [[ "$t" == "unknown" && "$tool_filter" == "codex" ]] && return 0 + return 1 + } + # List all valid skill invocation directories sorted newest-first # Skill dirs use YYYY-MM-DD_HH-MM-SS or YYYY-MM-DD_HH-MM-SS-PID-RANDOM naming _skill_list_dirs_sorted() { @@ -47,7 +82,9 @@ _humanize_monitor_skill() { [[ -z "$d" ]] && continue [[ ! -d "$d" ]] && continue local name=$(basename "$d") - [[ "$name" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{2}-[0-9]{2}-[0-9]{2} ]] && dirs+=("$d") + [[ "$name" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{2}-[0-9]{2}-[0-9]{2} ]] || continue + _skill_passes_filter "$d" || continue + dirs+=("$d") done < <(find "$skill_dir" -mindepth 1 -maxdepth 1 -type d 2>/dev/null) printf '%s\n' "${dirs[@]}" | sort -r } @@ -88,6 +125,7 @@ _humanize_monitor_skill() { [[ ! -d "$d" ]] && continue local name=$(basename "$d") [[ ! "$name" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{2}-[0-9]{2}-[0-9]{2} ]] && continue + _skill_passes_filter "$d" || continue ((total++)) if [[ -f "$d/metadata.md" ]]; then local st=$(monitor_get_yaml_value "status" "$d/metadata.md") @@ -127,6 +165,7 @@ _humanize_monitor_skill() { # Find the best file to monitor for a skill invocation # Searches both global cache (~/.cache/humanize/), local cache ($dir/cache/), # and project-local files (.humanize/skill/) for the best content. + # Supports both codex (codex-run.*) and gemini (gemini-run.*) cache files. _skill_find_monitored_file() { local dir="$1" local gcache=$(_skill_find_cache_dir "$dir") @@ -134,18 +173,29 @@ _humanize_monitor_skill() { local is_running=false [[ ! -f "$dir/metadata.md" ]] && is_running=true + # Determine which tool produced this invocation for cache file naming + local inv_tool=$(_skill_get_tool "$dir") + local run_prefix="codex-run" + [[ "$inv_tool" == "gemini" ]] && run_prefix="gemini-run" + # Helper: check a cache directory for best file # Args: cache_dir, prefer_log (true for running, false for completed) _check_cache_files() { local c="$1" prefer_log="$2" [[ ! -d "$c" ]] && return if [[ "$prefer_log" == "true" ]]; then + [[ -f "$c/${run_prefix}.log" && -s "$c/${run_prefix}.log" ]] && { echo "$c/${run_prefix}.log"; return; } + [[ -f "$c/${run_prefix}.out" && -s "$c/${run_prefix}.out" ]] && { echo "$c/${run_prefix}.out"; return; } + [[ -f "$c/${run_prefix}.log" ]] && { echo "$c/${run_prefix}.log"; return; } + # Fallback: try the other prefix for legacy/mixed invocations [[ -f "$c/codex-run.log" && -s "$c/codex-run.log" ]] && { echo "$c/codex-run.log"; return; } - [[ -f "$c/codex-run.out" && -s "$c/codex-run.out" ]] && { echo "$c/codex-run.out"; return; } - [[ -f "$c/codex-run.log" ]] && { echo "$c/codex-run.log"; return; } + [[ -f "$c/gemini-run.log" && -s "$c/gemini-run.log" ]] && { echo "$c/gemini-run.log"; return; } else + [[ -f "$c/${run_prefix}.out" && -s "$c/${run_prefix}.out" ]] && { echo "$c/${run_prefix}.out"; return; } + [[ -f "$c/${run_prefix}.log" && -s "$c/${run_prefix}.log" ]] && { echo "$c/${run_prefix}.log"; return; } + # Fallback [[ -f "$c/codex-run.out" && -s "$c/codex-run.out" ]] && { echo "$c/codex-run.out"; return; } - [[ -f "$c/codex-run.log" && -s "$c/codex-run.log" ]] && { echo "$c/codex-run.log"; return; } + [[ -f "$c/gemini-run.out" && -s "$c/gemini-run.out" ]] && { echo "$c/gemini-run.out"; return; } fi } @@ -166,6 +216,15 @@ _humanize_monitor_skill() { echo "" } + # Build the monitor title based on filter + _skill_monitor_title() { + case "$tool_filter" in + codex) echo " Humanize Skill Monitor [codex]" ;; + gemini) echo " Humanize Skill Monitor [gemini]" ;; + *) echo " Humanize Skill Monitor" ;; + esac + } + # Draw the status bar at the top _skill_draw_status_bar() { local latest_dir="$1" @@ -186,17 +245,21 @@ _humanize_monitor_skill() { # Parse latest invocation metadata local inv_status="running" model="N/A" effort="N/A" duration="N/A" started_at="N/A" + local inv_tool="unknown" if [[ -n "$latest_dir" && -f "$latest_dir/metadata.md" ]]; then inv_status=$(monitor_get_yaml_value "status" "$latest_dir/metadata.md") model=$(monitor_get_yaml_value "model" "$latest_dir/metadata.md") effort=$(monitor_get_yaml_value "effort" "$latest_dir/metadata.md") duration=$(monitor_get_yaml_value "duration" "$latest_dir/metadata.md") started_at=$(monitor_get_yaml_value "started_at" "$latest_dir/metadata.md") + inv_tool=$(monitor_get_yaml_value "tool" "$latest_dir/metadata.md") elif [[ -n "$latest_dir" && -f "$latest_dir/input.md" ]]; then model=$(grep -E '^- Model:' "$latest_dir/input.md" 2>/dev/null | sed 's/- Model: //') effort=$(grep -E '^- Effort:' "$latest_dir/input.md" 2>/dev/null | sed 's/- Effort: //') + inv_tool=$(grep -E '^- Tool:' "$latest_dir/input.md" 2>/dev/null | sed 's/- Tool: //') fi inv_status="${inv_status:-unknown}"; model="${model:-N/A}"; effort="${effort:-N/A}" + inv_tool="${inv_tool:-unknown}" # Status color local status_color="$dim" @@ -235,11 +298,19 @@ _humanize_monitor_skill() { cache_display="...${cache_display: -$csuffix_len}" fi + # Model display: for gemini, no effort; for codex, show (effort) + local model_display="$model" + if [[ "$inv_tool" == "gemini" ]] || [[ "$effort" == "N/A" ]]; then + model_display="$model" + else + model_display="$model ($effort)" + fi + tput sc tput cup 0 0 # Line 1: Title - printf "${bg}${bold}%-${term_width}s${reset}${clr_eol}\n" " Humanize Skill Monitor" + printf "${bg}${bold}%-${term_width}s${reset}${clr_eol}\n" "$(_skill_monitor_title)" # Line 2: Aggregate stats printf "${cyan}Total:${reset} ${bold}${total}${reset} invocations" [[ "$success" -gt 0 ]] && printf " | ${green}${success} success${reset}" @@ -248,8 +319,8 @@ _humanize_monitor_skill() { [[ "$empty" -gt 0 ]] && printf " | ${yellow}${empty} empty${reset}" [[ "$running" -gt 0 ]] && printf " | ${yellow}${running} running${reset}" printf "${clr_eol}\n" - # Line 3: Focused invocation status + model + duration - printf "${magenta}Focused:${reset} ${status_color}%s${reset} | ${yellow}Model:${reset} %s (%s) | ${cyan}Duration:${reset} %s${clr_eol}\n" "$inv_status" "$model" "$effort" "${duration:-N/A}" + # Line 3: Focused invocation status + tool + model + duration + printf "${magenta}Focused:${reset} ${status_color}%s${reset} | ${dim}[%s]${reset} ${yellow}Model:${reset} %s | ${cyan}Duration:${reset} %s${clr_eol}\n" "$inv_status" "$inv_tool" "$model_display" "${duration:-N/A}" # Line 4: Started at printf "${cyan}Started:${reset} %s${clr_eol}\n" "$start_display" # Line 5: Question @@ -269,7 +340,9 @@ _humanize_monitor_skill() { if [[ "$once_mode" == "true" ]]; then local latest=$(_skill_find_latest_dir) if [[ -z "$latest" ]]; then - echo "No skill invocations found in $skill_dir" + local filter_msg="" + [[ -n "$tool_filter" ]] && filter_msg=" (filter: $tool_filter)" + echo "No skill invocations found in $skill_dir$filter_msg" return 1 fi @@ -283,24 +356,29 @@ _humanize_monitor_skill() { local -a stats humanize_split_to_array stats "$(_skill_count_stats)" local inv_status="running" model="N/A" effort="N/A" duration="N/A" started_at="N/A" + local inv_tool="unknown" if [[ -f "$focus_dir/metadata.md" ]]; then inv_status=$(monitor_get_yaml_value "status" "$focus_dir/metadata.md") model=$(monitor_get_yaml_value "model" "$focus_dir/metadata.md") effort=$(monitor_get_yaml_value "effort" "$focus_dir/metadata.md") duration=$(monitor_get_yaml_value "duration" "$focus_dir/metadata.md") started_at=$(monitor_get_yaml_value "started_at" "$focus_dir/metadata.md") + inv_tool=$(monitor_get_yaml_value "tool" "$focus_dir/metadata.md") fi + inv_tool="${inv_tool:-unknown}" local question=$(_skill_get_question "$focus_dir") local cache_dir=$(_skill_find_cache_dir "$focus_dir") + local title=$(_skill_monitor_title) echo "==========================================" - echo " Humanize Skill Monitor" + echo "$title" echo "==========================================" echo "" echo "Total Invocations: ${stats[0]}" echo " Success: ${stats[1]} Error: ${stats[2]} Timeout: ${stats[3]} Empty: ${stats[4]} Running: ${stats[5]}" echo "" echo "Focused: $(basename "$focus_dir")" + echo " Tool: ${inv_tool}" echo " Status: ${inv_status:-unknown}" echo " Model: ${model:-N/A} (${effort:-N/A})" echo " Duration: ${duration:-N/A}" @@ -329,14 +407,16 @@ _humanize_monitor_skill() { while IFS= read -r d; do [[ -z "$d" ]] && continue local name=$(basename "$d") - local st="running" dur="" + local st="running" dur="" t="?" if [[ -f "$d/metadata.md" ]]; then st=$(monitor_get_yaml_value "status" "$d/metadata.md") dur=$(monitor_get_yaml_value "duration" "$d/metadata.md") + t=$(monitor_get_yaml_value "tool" "$d/metadata.md") fi + t="${t:-?}" local q=$(_skill_get_question "$d") [[ ${#q} -gt 50 ]] && q="${q:0:47}..." - printf " %-38s %-14s %-6s %s\n" "$name" "$st" "$dur" "$q" + printf " %-38s %-7s %-14s %-6s %s\n" "$name" "[$t]" "$st" "$dur" "$q" ((count++)) [[ $count -ge 10 ]] && break done < <(_skill_list_dirs_sorted) diff --git a/skills/ask-gemini/SKILL.md b/skills/ask-gemini/SKILL.md new file mode 100644 index 00000000..e31cdd57 --- /dev/null +++ b/skills/ask-gemini/SKILL.md @@ -0,0 +1,61 @@ +--- +name: ask-gemini +description: Consult Gemini as an independent expert with deep web research. Sends a question or task to Gemini CLI and returns a research-backed response. +argument-hint: "[--gemini-model MODEL] [--gemini-timeout SECONDS] [question or task]" +allowed-tools: "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/ask-gemini.sh:*)" +--- + +# Ask Gemini + +Send a question or task to Gemini and return a research-backed response. +Gemini is always instructed to perform web research via Google Search, +making this ideal for deep-research tasks that benefit from up-to-date +internet information. + +## How to Use + +Do not pass free-form user text to the shell unquoted. The question or task may contain spaces or shell metacharacters such as `(`, `)`, `;`, `#`, `*`, or `[`. + +If the user only supplied a question or task, execute: + +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/ask-gemini.sh" "$ARGUMENTS" +``` + +If the user supplied flags such as `--gemini-model` or `--gemini-timeout`, reconstruct the command so those flags remain separate shell arguments and the remaining free-form question is passed as one quoted final argument. + +Example: + +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/ask-gemini.sh" --gemini-model gemini-2.5-pro "What are the latest Rust async runtime benchmarks?" +``` + +Never run this unsafe form: + +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/ask-gemini.sh" $ARGUMENTS +``` + +because the shell will re-parse the question text and can fail before `ask-gemini.sh` starts. + +## Interpreting Output + +- The script outputs Gemini's response to **stdout** and status info to **stderr** +- Read the stdout output carefully and incorporate Gemini's response into your answer +- Gemini's responses are research-backed with web sources; relay source citations when available +- If the script exits with a non-zero code, report the error to the user + +## Error Handling + +| Exit Code | Meaning | +|-----------|---------| +| 0 | Success - Gemini response is in stdout | +| 1 | Validation error (missing gemini, empty question, invalid flags) | +| 124 | Timeout - suggest using `--gemini-timeout` with a larger value | +| Other | Gemini process error - report the exit code and any stderr output | + +## Notes + +- The response is saved to `.humanize/skill//output.md` for reference +- Default model is `gemini-3.1-pro-preview` with a 3600-second timeout +- Gemini is always instructed to perform Google Search for up-to-date information From 883e3f5bb8106cea4153d9f5e469b2fa7a8d6849 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 18:29:34 -0700 Subject: [PATCH 34/97] Revert version back to 1.16.0 to match dev branch --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index e4e1adf3..588e3b1d 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.17.0" + "version": "1.16.0" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index bbab412d..0f9ab32b 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.17.0", + "version": "1.16.0", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index 0a93dd30..bd7f9145 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.17.0** +**Current Version: 1.16.0** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. From 916fc5086ead2c89808efa7c06d390773d7a213e Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 18:32:10 -0700 Subject: [PATCH 35/97] Reject path traversal segments in methodology analysis fallback guards When realpath is unavailable, paths containing ".." could bypass the prefix-based allowlist check by matching the loop directory prefix as a raw string while resolving to a location outside it. Now reject any path with ".." segments in the fallback code path (fail closed). --- hooks/loop-read-validator.sh | 10 +++++++++- hooks/loop-write-validator.sh | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 9832b4e7..3188c1a4 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -97,8 +97,16 @@ if [[ -n "$_MA_CHECK_DIR" ]]; then fi _ma_real_loop=$(realpath "$_MA_CHECK_DIR" 2>/dev/null || echo "") # Fallback to raw paths when realpath is unavailable (older macOS/BSD) - # Ensure paths are absolute so prefix guards cannot be bypassed + # Ensure paths are absolute so prefix guards cannot be bypassed. + # Reject paths with ".." segments to prevent traversal bypasses + # when we cannot canonicalize (fail closed). if [[ -z "$_ma_real_path" ]]; then + if [[ "$FILE_PATH" == *".."* ]]; then + echo "# Read Blocked During Methodology Analysis + +Path contains traversal segments that cannot be resolved without realpath." >&2 + exit 2 + fi if [[ "$FILE_PATH" == /* ]]; then _ma_real_path="$FILE_PATH" else diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 8abc2e8c..b7314ed9 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -121,8 +121,16 @@ if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.m fi _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") # Fallback to raw paths when realpath is unavailable (older macOS/BSD) - # Ensure paths are absolute so prefix guards cannot be bypassed + # Ensure paths are absolute so prefix guards cannot be bypassed. + # Reject paths with ".." segments to prevent traversal bypasses + # when we cannot canonicalize (fail closed). if [[ -z "$_ma_real_path" ]]; then + if [[ "$FILE_PATH" == *".."* ]]; then + echo "# Write Blocked During Methodology Analysis + +Path contains traversal segments that cannot be resolved without realpath." >&2 + exit 2 + fi if [[ "$FILE_PATH" == /* ]]; then _ma_real_path="$FILE_PATH" else From 95a6b7b3ee48d5928758bf738e3e689f83e333d2 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 18:43:04 -0700 Subject: [PATCH 36/97] Fix edit validator path fallback and reject whitespace-only completion markers Edit validator had the same relative-path and traversal fallback issue as the read/write validators. Applied the same normalization and ".." rejection pattern. Methodology completion checker now trims whitespace before emptiness validation, preventing whitespace-only done markers or report files from passing the content gate. --- hooks/lib/methodology-analysis.sh | 3 +++ hooks/loop-edit-validator.sh | 23 +++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/hooks/lib/methodology-analysis.sh b/hooks/lib/methodology-analysis.sh index fb654b68..a95e81af 100644 --- a/hooks/lib/methodology-analysis.sh +++ b/hooks/lib/methodology-analysis.sh @@ -122,6 +122,8 @@ complete_methodology_analysis() { local done_content done_content=$(cat "$done_file" 2>/dev/null || echo "") + # Trim whitespace to reject whitespace-only markers + done_content="${done_content#"${done_content%%[![:space:]]*}"}" if [[ -z "$done_content" ]]; then return 1 fi @@ -134,6 +136,7 @@ complete_methodology_analysis() { fi local report_content report_content=$(cat "$report_file" 2>/dev/null || echo "") + report_content="${report_content#"${report_content%%[![:space:]]*}"}" if [[ -z "$report_content" ]]; then echo "Warning: methodology-analysis-report.md is empty, blocking completion" >&2 return 1 diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 64e43795..32ba3a9d 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -104,8 +104,27 @@ if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.m fi _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") # Fallback to raw paths when realpath is unavailable (older macOS/BSD) - [[ -z "$_ma_real_path" ]] && _ma_real_path="$FILE_PATH" - [[ -z "$_ma_real_loop" ]] && _ma_real_loop="$_MA_LOOP_DIR" + # Ensure paths are absolute and reject ".." to prevent traversal bypasses. + if [[ -z "$_ma_real_path" ]]; then + if [[ "$FILE_PATH" == *".."* ]]; then + echo "# Edit Blocked During Methodology Analysis + +Path contains traversal segments that cannot be resolved without realpath." >&2 + exit 2 + fi + if [[ "$FILE_PATH" == /* ]]; then + _ma_real_path="$FILE_PATH" + else + _ma_real_path="$PROJECT_ROOT/$FILE_PATH" + fi + fi + if [[ -z "$_ma_real_loop" ]]; then + if [[ "$_MA_LOOP_DIR" == /* ]]; then + _ma_real_loop="$_MA_LOOP_DIR" + else + _ma_real_loop="$PROJECT_ROOT/$_MA_LOOP_DIR" + fi + fi if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") case "$_ma_basename" in From bd506f2233d1c535f6204fdbe0fe916334b057c6 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 18:52:54 -0700 Subject: [PATCH 37/97] Reject ambiguous multi-keyword verdict lines and fix Gemini CLI install path Mainline verdict parser now rejects lines containing multiple verdict keywords (e.g. template placeholders listing all options) as unknown, preventing silent false-positive ADVANCED readings that reset drift counters. Fixed Gemini CLI install guidance to point to the correct package (@google/gemini-cli) and repository (google-gemini/gemini-cli). --- hooks/lib/loop-common.sh | 12 +++++++++++- scripts/ask-gemini.sh | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 480a7ab0..f349931a 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -588,7 +588,17 @@ extract_mainline_progress_verdict() { # Extract the verdict word using grep -oEi (portable) instead of sed /I (GNU-only). # The preceding grep -Ei already ensures the line contains one of the three verdicts. - verdict_value=$(printf '%s\n' "$verdict_line" | grep -oEi 'ADVANCED|STALLED|REGRESSED' | head -1) + # Reject lines with multiple verdict keywords (e.g. placeholder template formats) + # to avoid silently accepting an ambiguous verdict. + local _verdict_matches + _verdict_matches=$(printf '%s\n' "$verdict_line" | grep -oEi 'ADVANCED|STALLED|REGRESSED') + local _match_count + _match_count=$(printf '%s\n' "$_verdict_matches" | wc -l) + if [[ "$_match_count" -gt 1 ]]; then + echo "$MAINLINE_VERDICT_UNKNOWN" + return + fi + verdict_value=$(printf '%s\n' "$_verdict_matches" | head -1) normalize_mainline_progress_verdict "$verdict_value" } diff --git a/scripts/ask-gemini.sh b/scripts/ask-gemini.sh index 489bc4db..9f59b804 100755 --- a/scripts/ask-gemini.sh +++ b/scripts/ask-gemini.sh @@ -142,7 +142,7 @@ QUESTION="${QUESTION_PARTS[*]}" if ! command -v gemini &>/dev/null; then echo "Error: 'gemini' command is not installed or not in PATH" >&2 echo "" >&2 - echo "Please install Gemini CLI: npm install -g @anthropic-ai/gemini-cli or https://github.com/anthropics/gemini-cli" >&2 + echo "Please install Gemini CLI: npm install -g @google/gemini-cli or https://github.com/google-gemini/gemini-cli" >&2 echo "Then retry: /humanize:ask-gemini " >&2 exit 1 fi From 4561bc43ecd27304ebfdc50f18d8477af147cbc5 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 19:23:46 -0700 Subject: [PATCH 38/97] Shell-quote hook command paths and reject ambiguous verdict lines Hook installer now shell-quotes command paths via shlex.quote() so runtime roots containing spaces do not split the command at execution. Mainline verdict parser rejects lines with multiple verdict keywords as unknown to prevent template placeholders from being parsed as a valid verdict. Corrected Gemini CLI install guidance to the actual package and repo. --- scripts/install-codex-hooks.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scripts/install-codex-hooks.sh b/scripts/install-codex-hooks.sh index cd920690..c7907bf3 100755 --- a/scripts/install-codex-hooks.sh +++ b/scripts/install-codex-hooks.sh @@ -95,6 +95,7 @@ merge_hooks_json() { import json import pathlib import re +import shlex import sys hooks_file = pathlib.Path(sys.argv[1]) @@ -108,6 +109,14 @@ escaped_root = json.dumps(runtime_root)[1:-1] # strip outer quotes from dumps o template_text = template_text.replace("{{HUMANIZE_RUNTIME_ROOT}}", escaped_root) template = json.loads(template_text) +# Shell-quote command paths so spaces in runtime_root do not split the command +for group_list in template.get("hooks", {}).values(): + for group in group_list: + if isinstance(group, dict): + for hook in group.get("hooks", []): + if isinstance(hook, dict) and "command" in hook: + hook["command"] = shlex.quote(hook["command"]) + existing = {} if hooks_file.exists(): with hooks_file.open("r", encoding="utf-8") as fh: From 338b4dd9e12c1b2f1c12e6dbf107dc1c5922d2c0 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 20:21:25 -0700 Subject: [PATCH 39/97] Remove PR loop feature entirely The PR loop workflow is superseded by the /loop command combined with GitHub PR review polling. This removes all PR loop implementation, tests, documentation, templates, and supporting scripts. Deleted PR-loop-only files: commands (start/cancel-pr-loop), hooks (pr-loop-stop-hook), scripts (setup/cancel-pr-loop, check-bot-reactions, check-pr-reviewer-status, fetch-pr-comments, poll-pr-reviews), prompt templates (pr-loop/), test fixtures (setup-fixture-mock-gh), and all PR loop test files. Cleaned PR loop references from mixed files: hook validators, loop library, template loader, bash validator, humanize.sh monitor, monitor-common.sh, codex hooks config, install script, RLCR setup mutual exclusion, SKILL.md, usage docs, and test harnesses. The install script retains pr-loop-stop-hook.sh in its managed pattern so upgrading users get stale hooks cleaned from their hooks.json. --- .gitignore | 1 + commands/cancel-pr-loop.md | 25 - commands/start-pr-loop.md | 61 - config/codex-hooks.json | 8 +- docs/install-for-claude.md | 1 - docs/install-for-codex.md | 2 +- docs/usage.md | 42 +- hooks/hooks.json | 7 +- hooks/lib/loop-common.sh | 364 +--- hooks/lib/template-loader.sh | 2 +- hooks/loop-bash-validator.sh | 62 +- hooks/loop-edit-validator.sh | 30 +- hooks/loop-read-validator.sh | 5 +- hooks/loop-write-validator.sh | 30 +- hooks/pr-loop-stop-hook.sh | 1654 -------------- prompt-template/block/force-push-detected.md | 17 - prompt-template/block/no-trigger-comment.md | 17 - prompt-template/block/pr-loop-prompt-write.md | 9 - .../block/pr-loop-state-modification.md | 12 - .../pr-loop/codex-goal-tracker-update.md | 64 - .../critical-requirements-has-comments.md | 24 - .../critical-requirements-no-comments.md | 21 - .../pr-loop/goal-tracker-initial.md | 33 - prompt-template/pr-loop/round-0-header.md | 15 - .../pr-loop/round-0-task-has-comments.md | 43 - .../pr-loop/round-0-task-no-comments.md | 30 - scripts/cancel-pr-loop.sh | 132 -- scripts/check-bot-reactions.sh | 308 --- scripts/check-pr-reviewer-status.sh | 275 --- scripts/fetch-pr-comments.sh | 452 ---- scripts/humanize.sh | 443 +--- scripts/lib/monitor-common.sh | 165 +- scripts/poll-pr-reviews.sh | 328 --- scripts/setup-pr-loop.sh | 945 -------- scripts/setup-rlcr-loop.sh | 15 +- skills/humanize/SKILL.md | 47 +- tests/mocks/gh | 2 +- .../test-concurrent-state-robustness.sh | 55 - .../robustness/test-hook-system-robustness.sh | 17 - tests/robustness/test-pr-loop-api-fetch.sh | 19 - tests/robustness/test-pr-loop-api-poll.sh | 18 - .../robustness/test-pr-loop-api-robustness.sh | 866 -------- .../test-setup-scripts-robustness.sh | 148 +- tests/run-all-tests.sh | 6 - tests/setup-fixture-mock-gh.sh | 101 - tests/setup-monitor-test-env.sh | 67 +- tests/test-codex-hook-install.sh | 16 +- tests/test-disable-nested-codex-hooks.sh | 8 - tests/test-monitor-e2e-deletion.sh | 1 - tests/test-monitor-e2e-real.sh | 314 --- tests/test-monitor-e2e-sigint.sh | 1 - tests/test-pr-loop-1-scripts.sh | 24 - tests/test-pr-loop-2-hooks.sh | 24 - tests/test-pr-loop-3-stophook.sh | 31 - tests/test-pr-loop-hooks.sh | 1623 -------------- tests/test-pr-loop-lib.sh | 145 -- tests/test-pr-loop-scripts.sh | 410 ---- tests/test-pr-loop-stophook.sh | 1782 --------------- tests/test-pr-loop-system.sh | 1904 ----------------- tests/test-pr-loop.sh | 53 - tests/test-unified-codex-config.sh | 191 -- 61 files changed, 32 insertions(+), 13483 deletions(-) delete mode 100644 commands/cancel-pr-loop.md delete mode 100644 commands/start-pr-loop.md delete mode 100755 hooks/pr-loop-stop-hook.sh delete mode 100644 prompt-template/block/force-push-detected.md delete mode 100644 prompt-template/block/no-trigger-comment.md delete mode 100644 prompt-template/block/pr-loop-prompt-write.md delete mode 100644 prompt-template/block/pr-loop-state-modification.md delete mode 100644 prompt-template/pr-loop/codex-goal-tracker-update.md delete mode 100644 prompt-template/pr-loop/critical-requirements-has-comments.md delete mode 100644 prompt-template/pr-loop/critical-requirements-no-comments.md delete mode 100644 prompt-template/pr-loop/goal-tracker-initial.md delete mode 100644 prompt-template/pr-loop/round-0-header.md delete mode 100644 prompt-template/pr-loop/round-0-task-has-comments.md delete mode 100644 prompt-template/pr-loop/round-0-task-no-comments.md delete mode 100755 scripts/cancel-pr-loop.sh delete mode 100755 scripts/check-bot-reactions.sh delete mode 100755 scripts/check-pr-reviewer-status.sh delete mode 100755 scripts/fetch-pr-comments.sh delete mode 100755 scripts/poll-pr-reviews.sh delete mode 100755 scripts/setup-pr-loop.sh delete mode 100755 tests/robustness/test-pr-loop-api-fetch.sh delete mode 100755 tests/robustness/test-pr-loop-api-poll.sh delete mode 100755 tests/robustness/test-pr-loop-api-robustness.sh delete mode 100755 tests/setup-fixture-mock-gh.sh delete mode 100755 tests/test-pr-loop-1-scripts.sh delete mode 100755 tests/test-pr-loop-2-hooks.sh delete mode 100755 tests/test-pr-loop-3-stophook.sh delete mode 100644 tests/test-pr-loop-hooks.sh delete mode 100644 tests/test-pr-loop-lib.sh delete mode 100644 tests/test-pr-loop-scripts.sh delete mode 100644 tests/test-pr-loop-stophook.sh delete mode 100755 tests/test-pr-loop-system.sh delete mode 100755 tests/test-pr-loop.sh diff --git a/.gitignore b/.gitignore index 2308545e..e5bcf34c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ temp # Local Claude client settings /.claude/settings.json +/.claude/scheduled_tasks.lock # Humanize state directories (runtime-generated, project-local) .humanize/ diff --git a/commands/cancel-pr-loop.md b/commands/cancel-pr-loop.md deleted file mode 100644 index 9f7b5bd4..00000000 --- a/commands/cancel-pr-loop.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -description: "Cancel active PR loop" -allowed-tools: ["Bash(${CLAUDE_PLUGIN_ROOT}/scripts/cancel-pr-loop.sh)", "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/cancel-pr-loop.sh --force)"] -disable-model-invocation: true ---- - -# Cancel PR Loop - -To cancel the active PR loop: - -1. Run the cancel script: - -```bash -"${CLAUDE_PLUGIN_ROOT}/scripts/cancel-pr-loop.sh" -``` - -2. Check the first line of output: - - **NO_LOOP** or **NO_ACTIVE_LOOP**: Say "No active PR loop found." - - **CANCELLED**: Report the cancellation message from the output - -**Key principle**: The script handles all cancellation logic. A PR loop is active if `state.md` exists in the newest PR loop directory (.humanize/pr-loop/). - -The loop directory with comments, resolution summaries, and state information will be preserved for reference. - -**Note**: This command only affects PR loops. RLCR loops (.humanize/rlcr/) are not affected. Use `/humanize:cancel-rlcr-loop` to cancel RLCR loops. diff --git a/commands/start-pr-loop.md b/commands/start-pr-loop.md deleted file mode 100644 index aebdf207..00000000 --- a/commands/start-pr-loop.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -description: "Start PR review loop with bot monitoring" -argument-hint: "--claude|--codex [--max N] [--codex-model MODEL:EFFORT] [--codex-timeout SECONDS]" -allowed-tools: ["Bash(${CLAUDE_PLUGIN_ROOT}/scripts/setup-pr-loop.sh:*)"] ---- - -# Start PR Loop - -Execute the setup script to initialize the PR review loop: - -```bash -"${CLAUDE_PLUGIN_ROOT}/scripts/setup-pr-loop.sh" $ARGUMENTS -``` - -This command starts a PR review loop that: - -1. Detects the PR associated with the current branch -2. Fetches review comments from the specified bot(s) -3. You analyze and fix issues identified by the bot(s) -4. Push changes and trigger re-review by commenting @bot -5. Stop Hook polls for new bot reviews (every 30s, 15min timeout) -6. Local Codex validates if remote concerns are valid or approved - -## Bot Flags (Required) - -At least one bot flag is required: -- `--claude` - Monitor reviews from claude[bot] (trigger with @claude) -- `--codex` - Monitor reviews from chatgpt-codex-connector[bot] (trigger with @codex) - -## Comment Prioritization - -Comments are processed in this order: -1. **Human comments first** - They always take precedence over bots -2. **Bot comments** - Newest comments analyzed first - -## Workflow - -1. Analyze PR comments and fix issues -2. Commit and push changes -3. Comment on PR to trigger re-review using the bot mentions shown in the prompt -4. Write resolution summary to the specified file -5. Try to exit - Stop Hook intercepts and polls for bot reviews -6. If issues remain, receive feedback and continue -7. If all bots approve, loop ends - -**Note:** The setup script provides the exact mention string to use (e.g., `@claude @codex`). -Use whatever bot mentions are shown in the initial prompt - they match the flags you provided. - -## Important Rules - -1. **Write summaries**: Always write your resolution summary to the specified file before exiting -2. **Push changes**: Your fixes must be pushed for bots to review them -3. **Tag bots**: Use the correct @mention format to trigger bot reviews -4. **No cheating**: Do not try to exit the loop by editing state files or running cancel commands -5. **Trust the process**: The Stop Hook manages polling and Codex validation - -## Stopping the Loop - -- Reach the maximum iteration count -- All monitored bots approve the changes -- User runs `/humanize:cancel-pr-loop` diff --git a/config/codex-hooks.json b/config/codex-hooks.json index 7a04402a..98d31c33 100644 --- a/config/codex-hooks.json +++ b/config/codex-hooks.json @@ -1,5 +1,5 @@ { - "description": "Humanize Codex Hooks - Native Stop hooks for RLCR and PR loops", + "description": "Humanize Codex Hooks - Native Stop hooks for RLCR loops", "hooks": { "Stop": [ { @@ -9,12 +9,6 @@ "command": "{{HUMANIZE_RUNTIME_ROOT}}/hooks/loop-codex-stop-hook.sh", "timeout": 7200, "statusMessage": "humanize RLCR stop hook" - }, - { - "type": "command", - "command": "{{HUMANIZE_RUNTIME_ROOT}}/hooks/pr-loop-stop-hook.sh", - "timeout": 7200, - "statusMessage": "humanize PR stop hook" } ] } diff --git a/docs/install-for-claude.md b/docs/install-for-claude.md index 78481c99..5af9b4ca 100644 --- a/docs/install-for-claude.md +++ b/docs/install-for-claude.md @@ -68,7 +68,6 @@ Then use: ```bash humanize monitor rlcr # Monitor RLCR loop -humanize monitor pr # Monitor PR loop ``` ## Other Install Guides diff --git a/docs/install-for-codex.md b/docs/install-for-codex.md index 8698d001..8a9aecd0 100644 --- a/docs/install-for-codex.md +++ b/docs/install-for-codex.md @@ -76,7 +76,7 @@ sed -n '1,220p' "${CODEX_HOME:-$HOME/.codex}/hooks.json" Expected: - `codex_hooks` is `true` -- `hooks.json` contains `loop-codex-stop-hook.sh` and `pr-loop-stop-hook.sh` +- `hooks.json` contains `loop-codex-stop-hook.sh` - `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` contains `bitlesson_model` set to a Codex/OpenAI model such as `gpt-5.4` - for `--target codex`, `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` also contains `provider_mode: "codex-only"` diff --git a/docs/usage.md b/docs/usage.md index b5625bec..b7e9738a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -63,8 +63,6 @@ The quiz is advisory, not a gate. You always have the option to proceed. But tha | `/cancel-rlcr-loop` | Cancel active loop | | `/gen-plan --input --output ` | Generate structured plan from draft | | `/refine-plan --input ` | Refine an annotated plan and generate a QA ledger | -| `/start-pr-loop --claude\|--codex` | Start PR review loop with bot monitoring | -| `/cancel-pr-loop` | Cancel active PR loop | | `/ask-codex [question]` | One-shot consultation with Codex | ## Command Reference @@ -209,39 +207,6 @@ If `--alt-language` is set to a supported non-English language, the command also translated plan and QA variants by inserting `_` before the file extension, such as `plan_zh.md` and `plan-qa_zh.md`. -### start-pr-loop - -``` -/humanize:start-pr-loop --claude|--codex [OPTIONS] - -BOT FLAGS (at least one required): - --claude Monitor reviews from claude[bot] (trigger with @claude) - --codex Monitor reviews from chatgpt-codex-connector[bot] (trigger with @codex) - -OPTIONS: - --max Maximum iterations before auto-stop (default: 42) - --codex-model - Codex model and reasoning effort (default from config, effort: medium) - --codex-timeout - Timeout for each Codex review in seconds (default: 900) - -h, --help Show help message -``` - -The PR loop automates the process of handling GitHub PR reviews from remote bots: - -1. Detects the PR associated with the current branch -2. Fetches review comments from the specified bot(s) -3. Claude analyzes and fixes issues identified by the bot(s) -4. Pushes changes and triggers re-review by commenting @bot -5. Stop Hook polls for new bot reviews (every 30s, 15min timeout per bot) -6. Local Codex validates if remote concerns are approved or have issues -7. Loop continues until all bots approve or max iterations reached - -**Prerequisites:** -- GitHub CLI (`gh`) must be installed and authenticated -- Codex CLI must be installed -- Current branch must have an associated open PR - ### ask-codex ``` @@ -284,7 +249,7 @@ Current built-in keys: ### Codex Model Configuration -All Codex-using features (RLCR loop, PR loop, ask-codex) share the same model configuration: +All Codex-using features (RLCR loop, ask-codex) share the same model configuration: | Key | Default | Description | |-----|---------|-------------| @@ -307,7 +272,7 @@ are unset, so BitLesson selection stays on the Codex/OpenAI path without probing Codex model is resolved with this precedence: 1. CLI `--codex-model` flag (highest priority) -2. Feature-specific defaults (e.g., PR loop defaults to `medium` effort) +2. Feature-specific defaults 3. Config-backed defaults from the 4-layer hierarchy above 4. Hardcoded fallback (`gpt-5.4:high`) @@ -327,8 +292,6 @@ source ~/.claude/plugins/cache/humania/humanize//scripts/humaniz # Monitor RLCR loop progress humanize monitor rlcr -# Monitor PR loop progress -humanize monitor pr ``` Progress data is stored in `.humanize/rlcr//` for each loop session. @@ -336,7 +299,6 @@ Progress data is stored in `.humanize/rlcr//` for each loop session. ## Cancellation - **RLCR loop**: `/humanize:cancel-rlcr-loop` -- **PR loop**: `/humanize:cancel-pr-loop` ## Environment Variables diff --git a/hooks/hooks.json b/hooks/hooks.json index e25ebe30..dcb99266 100644 --- a/hooks/hooks.json +++ b/hooks/hooks.json @@ -1,5 +1,5 @@ { - "description": "Humanize Plugin Hooks - Validation hooks and Stop hooks for /start-rlcr-loop and /start-pr-loop", + "description": "Humanize Plugin Hooks - Validation hooks and Stop hooks for /start-rlcr-loop", "hooks": { "UserPromptSubmit": [ { @@ -67,11 +67,6 @@ "type": "command", "command": "${CLAUDE_PLUGIN_ROOT}/hooks/loop-codex-stop-hook.sh", "timeout": 7200 - }, - { - "type": "command", - "command": "${CLAUDE_PLUGIN_ROOT}/hooks/pr-loop-stop-hook.sh", - "timeout": 7200 } ] } diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index f349931a..2425449b 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -193,8 +193,8 @@ DEFAULT_BITLESSON_MODEL="$(get_config_value "$_LOOP_COMMON_CONFIG" "bitlesson_mo DEFAULT_BITLESSON_MODEL="${DEFAULT_BITLESSON_MODEL:-haiku}" # Load codex model/effort from merged config so .humanize/config.json can set persistent -# defaults for all Codex-using features (RLCR, PR loop, ask-codex). -# Precedence: pre-set by caller (e.g. PR loop) > config value > hardcoded fallback (gpt-5.4/high) +# defaults for all Codex-using features (RLCR, ask-codex). +# Precedence: pre-set by caller > config value > hardcoded fallback (gpt-5.4/high) _cfg_codex_model="$(get_config_value "$_LOOP_COMMON_CONFIG" "codex_model" 2>/dev/null || true)" if [[ -n "$_cfg_codex_model" && ! "$_cfg_codex_model" =~ ^[a-zA-Z0-9._-]+$ ]]; then echo "Warning: Invalid codex_model in merged config: $_cfg_codex_model" >&2 @@ -1159,196 +1159,6 @@ is_in_humanize_loop_dir() { echo "$path" | grep -q '\.humanize/rlcr/' } -# ======================================== -# PR Loop Bot Name Mapping -# ======================================== - -# Map bot names to GitHub comment author names: -# - claude -> claude[bot] -# - codex -> chatgpt-codex-connector[bot] -# -# Usage: author=$(map_bot_to_author "codex") -map_bot_to_author() { - local bot="$1" - case "$bot" in - codex) echo "chatgpt-codex-connector[bot]" ;; - *) echo "${bot}[bot]" ;; - esac -} - -# Reverse mapping: author name to bot name -# - chatgpt-codex-connector[bot] -> codex -# - chatgpt-codex-connector -> codex -# - claude[bot] -> claude -# -# Usage: bot=$(map_author_to_bot "chatgpt-codex-connector[bot]") -map_author_to_bot() { - local author="$1" - # Remove [bot] suffix if present - local author_clean="${author%\[bot\]}" - case "$author_clean" in - chatgpt-codex-connector) echo "codex" ;; - *) echo "$author_clean" ;; - esac -} - -# Build a YAML list string from an array of values -# Returns multiline string with " - value" for each item -# -# Usage: yaml_list=$(build_yaml_list "${array[@]}") -build_yaml_list() { - local result="" - for item in "$@"; do - result="${result} - - ${item}" - done - echo "$result" -} - -# Build a mention string from bot names (e.g., "@claude @codex") -# -# Usage: mentions=$(build_bot_mention_string "${bots[@]}") -build_bot_mention_string() { - local result="" - for bot in "$@"; do - if [[ -n "$result" ]]; then - result="${result} @${bot}" - else - result="@${bot}" - fi - done - echo "$result" -} - -# ======================================== -# PR Loop Directory Functions -# ======================================== - -# Check if a path is inside .humanize/pr-loop directory -is_in_pr_loop_dir() { - local path="$1" - echo "$path" | grep -q '\.humanize/pr-loop/' -} - -# Check if a path is inside any loop directory (RLCR or PR loop) -is_in_any_loop_dir() { - local path="$1" - is_in_humanize_loop_dir "$path" || is_in_pr_loop_dir "$path" -} - -# Find the most recent active PR loop directory with state.md -# Similar to find_active_loop but for PR loops -# Outputs the directory path to stdout, or empty string if none found -find_active_pr_loop() { - local loop_base_dir="$1" - - if [[ ! -d "$loop_base_dir" ]]; then - echo "" - return - fi - - local newest_dir - newest_dir=$(ls -1d "$loop_base_dir"/*/ 2>/dev/null | sort -r | head -1) - - if [[ -n "$newest_dir" && -f "${newest_dir}state.md" ]]; then - echo "${newest_dir%/}" - else - echo "" - fi -} - -# Check if a path (lowercase) matches a PR loop round file pattern -# Types: pr-comment, pr-resolve, pr-check, pr-feedback, prompt, codex-prompt -is_pr_round_file_type() { - local path_lower="$1" - local file_type="$2" - - echo "$path_lower" | grep -qE "round-[0-9]+-${file_type}\\.md\$" -} - -# Check if a path matches any PR loop read-only file type -# These files are generated by the system and should not be modified by Claude -is_pr_loop_readonly_file() { - local path_lower="$1" - - is_pr_round_file_type "$path_lower" "pr-comment" || \ - is_pr_round_file_type "$path_lower" "prompt" || \ - is_pr_round_file_type "$path_lower" "codex-prompt" || \ - is_pr_round_file_type "$path_lower" "pr-check" || \ - is_pr_round_file_type "$path_lower" "pr-feedback" -} - -# Validate PR loop pr-resolve file round number -# Returns 0 if valid (correct round or no active loop), exits with error message if wrong round -# Usage: validate_pr_resolve_round "$file_path_lower" "$action_verb" -# Arguments: -# $1 - File path (lowercase) -# $2 - Action verb for error message ("edit" or "write to") -validate_pr_resolve_round() { - local file_path_lower="$1" - local action_verb="$2" - - local project_root="${CLAUDE_PROJECT_DIR:-$(pwd)}" - local pr_loop_base_dir="$project_root/.humanize/pr-loop" - local active_pr_loop_dir - active_pr_loop_dir=$(find_active_pr_loop "$pr_loop_base_dir") - - if [[ -z "$active_pr_loop_dir" ]]; then - return 0 - fi - - local pr_state_file="$active_pr_loop_dir/state.md" - if [[ ! -f "$pr_state_file" ]]; then - return 0 - fi - - local pr_current_round - pr_current_round=$(sed -n '/^---$/,/^---$/{ /^current_round:/{ s/current_round: *//; p; } }' "$pr_state_file" | tr -d ' ') - pr_current_round="${pr_current_round:-0}" - - local claude_pr_round - claude_pr_round=$(echo "$file_path_lower" | sed -n 's|.*round-\([0-9]*\)-pr-resolve\.md$|\1|p') - - if [[ -n "$claude_pr_round" ]] && [[ "$claude_pr_round" != "$pr_current_round" ]]; then - local correct_path="$active_pr_loop_dir/round-${pr_current_round}-pr-resolve.md" - # NOTE: Avoid ${var^} (Bash 4+ only) for macOS Bash 3.2 compatibility - # Use tr for portable capitalization of first letter - local action_verb_cap - action_verb_cap=$(echo "$action_verb" | sed 's/^\(.\)/\U\1/') - # Fallback for systems where \U doesn't work (use awk instead) - if [[ "$action_verb_cap" == "$action_verb" ]] || [[ "$action_verb_cap" == *'U'* ]]; then - action_verb_cap=$(echo "$action_verb" | awk '{print toupper(substr($0,1,1)) tolower(substr($0,2))}') - fi - echo "# Wrong Round Number" >&2 - echo "" >&2 - echo "You tried to $action_verb round-${claude_pr_round}-pr-resolve.md but current PR loop round is **${pr_current_round}**." >&2 - echo "" >&2 - echo "$action_verb_cap: \`$correct_path\`" >&2 - return 2 - fi - - return 0 -} - -# Standard message for blocking PR loop state file modifications -pr_loop_state_blocked_message() { - local fallback="# PR Loop State File Modification Blocked - -You cannot modify state.md in .humanize/pr-loop/. This file is managed by the PR loop system." - - load_and_render_safe "$TEMPLATE_DIR" "block/pr-loop-state-modification.md" "$fallback" -} - -# Standard message for blocking PR loop prompt/comment file writes -pr_loop_prompt_blocked_message() { - local fallback="# PR Loop File Write Blocked - -You cannot write to round-*-pr-comment.md or round-*-prompt.md files in .humanize/pr-loop/. -These files are generated by the PR loop system and are read-only." - - load_and_render_safe "$TEMPLATE_DIR" "block/pr-loop-prompt-write.md" "$fallback" -} - # Check if a git add command would add .humanize files to version control # Usage: git_adds_humanize "$command_lower" # Returns 0 if the command would add .humanize files, 1 otherwise @@ -1608,173 +1418,3 @@ end_loop() { return 1 fi } - -# ======================================== -# PR Loop Goal Tracker Functions -# ======================================== - -# Update the PR goal tracker after Codex analysis -# Usage: update_pr_goal_tracker "$GOAL_TRACKER_FILE" "$ROUND" "$BOT_RESULTS_JSON" -# -# Arguments: -# $1 - Path to goal-tracker.md -# $2 - Current round number -# $3 - JSON containing per-bot analysis results (optional) -# Format: {"bot": "name", "issues": N, "resolved": N} -# -# Updates: -# - Issue Summary table with new row -# - Total Statistics section -# - Issue Log with round entry -# -# Note: This is a helper function for the stop hook. The primary update -# mechanism is through Codex prompt instructions, but this ensures -# consistency when Codex doesn't update correctly. -update_pr_goal_tracker() { - local tracker_file="$1" - local round="$2" - local bot_results="${3:-}" - - if [[ ! -f "$tracker_file" ]]; then - echo "Warning: Goal tracker not found: $tracker_file" >&2 - return 1 - fi - - # Extract reviewer early for idempotency check (need to check round+reviewer combo) - local reviewer="Codex" - if [[ -n "$bot_results" && "$bot_results" != "null" ]]; then - reviewer=$(echo "$bot_results" | jq -r '.bot // "Codex"' 2>/dev/null || echo "Codex") - fi - - # IDEMPOTENCY CHECK: Check for BOTH round AND reviewer to support multi-bot rounds - # This allows multiple bots to add their own rows for the same round - local has_summary_row=false - local has_log_entry=false - - # Check if this specific round+reviewer combo already exists in Issue Summary - # Table format: | Round | Reviewer | Issues Found | Issues Resolved | Status | - if grep -qE "^\|[[:space:]]*${round}[[:space:]]*\|[[:space:]]*${reviewer}[[:space:]]*\|" "$tracker_file" 2>/dev/null; then - has_summary_row=true - fi - - # Check if this specific round+reviewer combo already exists in Issue Log - # Log format: "### Round N" followed by "Reviewer: ..." - if awk -v round="$round" -v reviewer="$reviewer" ' - /^### Round / { current_round = $3 } - current_round == round && $1 == reviewer":" { found = 1; exit } - END { exit !found } - ' "$tracker_file" 2>/dev/null; then - has_log_entry=true - fi - - if [[ "$has_summary_row" == "true" && "$has_log_entry" == "true" ]]; then - echo "Goal tracker: Round $round/$reviewer already has both Issue Summary and Issue Log entries, skipping update" >&2 - return 0 - fi - - # Track what we need to add (for partial updates) - local need_summary_row=true - local need_log_entry=true - [[ "$has_summary_row" == "true" ]] && need_summary_row=false - [[ "$has_log_entry" == "true" ]] && need_log_entry=false - - if [[ "$has_summary_row" == "true" || "$has_log_entry" == "true" ]]; then - echo "Goal tracker: Round $round/$reviewer has partial update (summary=$has_summary_row, log=$has_log_entry), completing..." >&2 - fi - - # Extract current totals - local current_found - current_found=$(grep -E "^- Total Issues Found:" "$tracker_file" | sed 's/.*: //' | tr -d ' ') - current_found=${current_found:-0} - - local current_resolved - current_resolved=$(grep -E "^- Total Issues Resolved:" "$tracker_file" | sed 's/.*: //' | tr -d ' ') - current_resolved=${current_resolved:-0} - - # Parse bot results if provided (reviewer already extracted above for idempotency check) - local new_issues=0 - local new_resolved=0 - - if [[ -n "$bot_results" && "$bot_results" != "null" ]]; then - new_issues=$(echo "$bot_results" | jq -r '.issues // 0' 2>/dev/null || echo "0") - new_resolved=$(echo "$bot_results" | jq -r '.resolved // 0' 2>/dev/null || echo "0") - fi - - # Calculate new totals - local total_found=$((current_found + new_issues)) - local total_resolved=$((current_resolved + new_resolved)) - local remaining=$((total_found - total_resolved)) - - # Determine status for this round - local status="In Progress" - if [[ $new_issues -eq 0 && $new_resolved -eq 0 ]]; then - status="Approved" - elif [[ $new_issues -gt 0 ]]; then - status="Issues Found" - elif [[ $new_resolved -gt 0 ]]; then - status="Resolved" - fi - - # Create temp file for updates - local temp_file="${tracker_file}.update.$$" - - # Step 1: Update Total Statistics (only if we're adding to totals) - # Only update totals if we're adding a new summary row (to avoid double-counting) - if [[ "$need_summary_row" == "true" ]]; then - sed -e "s/^- Total Issues Found:.*/- Total Issues Found: $total_found/" \ - -e "s/^- Total Issues Resolved:.*/- Total Issues Resolved: $total_resolved/" \ - -e "s/^- Remaining:.*/- Remaining: $remaining/" \ - "$tracker_file" > "$temp_file" - else - cp "$tracker_file" "$temp_file" - fi - - # Step 2: Add row to Issue Summary table (only if needed) - if [[ "$need_summary_row" == "true" ]]; then - # Insert row INSIDE the table (after last table row, before blank line) - local new_row="| $round | $reviewer | $new_issues | $new_resolved | $status |" - - # Use awk to find the last row of the Issue Summary table and insert after it - awk -v row="$new_row" ' - BEGIN { in_table = 0; last_row_printed = 0 } - /^## Issue Summary/ { in_table = 1 } - /^## Total Statistics/ { in_table = 0 } - { - # If we hit Total Statistics and havent printed the new row yet, print it first - if (/^## Total Statistics/ && !last_row_printed) { - print row - print "" - last_row_printed = 1 - } - # If in table and this is a table row (starts with |), store it - if (in_table && /^\|/) { - last_table_line = NR - } - # If in table and this is a blank line after table rows, insert new row - if (in_table && /^[[:space:]]*$/ && last_table_line > 0 && !last_row_printed) { - print row - last_row_printed = 1 - } - print - } - ' "$temp_file" > "${temp_file}.2" - mv "${temp_file}.2" "$temp_file" - fi - - # Step 3: Add Issue Log entry for this round (only if needed) - if [[ "$need_log_entry" == "true" ]]; then - local timestamp - timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") - local log_entry="### Round $round -$reviewer: Found $new_issues issues, Resolved $new_resolved -Updated: $timestamp -" - # Append to Issue Log section - echo "" >> "$temp_file" - echo "$log_entry" >> "$temp_file" - fi - - mv "$temp_file" "$tracker_file" - echo "Goal tracker updated: Round $round, Reviewer=$reviewer, Found=$new_issues, Resolved=$new_resolved" >&2 - return 0 -} diff --git a/hooks/lib/template-loader.sh b/hooks/lib/template-loader.sh index 7ad32657..13d29f6e 100644 --- a/hooks/lib/template-loader.sh +++ b/hooks/lib/template-loader.sh @@ -221,7 +221,7 @@ validate_template_dir() { return 1 fi - local required_subdirs=("block" "codex" "claude" "plan" "pr-loop") + local required_subdirs=("block" "codex" "claude" "plan") local missing=() local subdir for subdir in "${required_subdirs[@]}"; do diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index d3cf6434..547210fc 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -1,14 +1,12 @@ #!/usr/bin/env bash # -# PreToolUse Hook: Validate Bash commands for RLCR loop and PR loop +# PreToolUse Hook: Validate Bash commands for RLCR loop # # Blocks attempts to bypass Write/Edit hooks using shell commands: # - cat/echo/printf > file.md (redirection) # - tee file.md # - sed -i file.md (in-place edit) # - goal-tracker.md modifications via Bash -# - PR loop state.md modifications -# - PR loop read-only file modifications (pr-comment, prompt, codex-prompt, etc.) # set -euo pipefail @@ -60,10 +58,6 @@ HOOK_SESSION_ID=$(extract_session_id "$HOOK_INPUT") LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" ACTIVE_LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID") -# Check for active PR loop -PR_LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/pr-loop" -ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") - # ======================================== # Methodology Analysis Phase Bash Restriction # ======================================== @@ -158,8 +152,8 @@ File redirection is not allowed during the methodology analysis phase." >&2 fi fi -# If no active loop of either type, allow all commands -if [[ -z "$ACTIVE_LOOP_DIR" ]] && [[ -z "$ACTIVE_PR_LOOP_DIR" ]]; then +# If no active RLCR loop, allow all commands +if [[ -z "$ACTIVE_LOOP_DIR" ]]; then exit 0 fi @@ -169,7 +163,7 @@ fi # Prevents Claude from manually running stop hook or stop gate scripts. # These scripts should only be invoked by the hooks system, not via Bash. -BLOCKED_HOOK_SCRIPTS="(loop-codex-stop-hook\.sh|pr-loop-stop-hook\.sh|rlcr-stop-gate\.sh)" +BLOCKED_HOOK_SCRIPTS="(loop-codex-stop-hook\.sh|rlcr-stop-gate\.sh)" HOOK_ASSIGNMENT_PREFIX="[[:alpha:]_][[:alnum:]_]*=[^[:space:];&|]+" HOOK_COMMAND_PREFIX="command([[:space:]]+(-[^[:space:];&|]+|--))*" HOOK_ENV_PREFIX="env([[:space:]]+(-[^[:space:];&|]+|--|${HOOK_ASSIGNMENT_PREFIX}))*" @@ -558,52 +552,4 @@ fi fi # End of RLCR-specific checks -# ======================================== -# PR Loop File Protection -# ======================================== -# Block modifications to PR loop state and read-only files -# Note: ACTIVE_PR_LOOP_DIR was already set at the top of the script - -if [[ -n "$ACTIVE_PR_LOOP_DIR" ]]; then - # Block PR loop state.md modifications - # Check both full path pattern AND bare filename to catch relative path bypass - # (e.g., cd .humanize/pr-loop/timestamp && sed -i state.md) - if command_modifies_file "$COMMAND_LOWER" "\.humanize/pr-loop(/[^/]+)?/state\.md"; then - pr_loop_state_blocked_message >&2 - exit 2 - fi - # Bare filename check for state.md (catches relative path usage) - if command_modifies_file "$COMMAND_LOWER" "state\.md"; then - pr_loop_state_blocked_message >&2 - exit 2 - fi - - # Block PR loop read-only files: - # - round-N-pr-comment.md (fetched comments) - # - round-N-prompt.md (prompts from system) - # - round-N-codex-prompt.md (Codex prompts) - # - round-N-pr-check.md (Codex output) - # - round-N-pr-feedback.md (feedback for next round) - PR_LOOP_READONLY_PATTERNS=( - "round-[0-9]+-pr-comment\.md" - "round-[0-9]+-prompt\.md" - "round-[0-9]+-codex-prompt\.md" - "round-[0-9]+-pr-check\.md" - "round-[0-9]+-pr-feedback\.md" - ) - - for pattern in "${PR_LOOP_READONLY_PATTERNS[@]}"; do - # Check both full path pattern AND bare filename to catch relative path bypass - if command_modifies_file "$COMMAND_LOWER" "\.humanize/pr-loop(/[^/]+)?/${pattern}"; then - pr_loop_prompt_blocked_message >&2 - exit 2 - fi - # Bare filename check (catches relative path usage from within loop dir) - if command_modifies_file "$COMMAND_LOWER" "${pattern}"; then - pr_loop_prompt_blocked_message >&2 - exit 2 - fi - done -fi - exit 0 diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 32ba3a9d..17fae65c 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# PreToolUse Hook: Validate Edit paths for RLCR loop and PR loop +# PreToolUse Hook: Validate Edit paths for RLCR loop # # Blocks Claude from editing: # - Todos files (should use native Task tools instead) @@ -8,8 +8,6 @@ # - State files (managed by hooks, not Claude) # - Wrong round number contract files # - Goal tracker edits outside the active loop or that alter the immutable section -# - PR loop state files (.humanize/pr-loop/) -# - PR loop read-only files (pr-comment, prompt, codex-prompt, pr-check, pr-feedback) # set -euo pipefail @@ -54,32 +52,6 @@ if is_round_file_type "$FILE_PATH_LOWER" "prompt"; then exit 2 fi -# ======================================== -# PR Loop File Protection -# ======================================== - -IN_PR_LOOP_DIR=$(is_in_pr_loop_dir "$FILE_PATH" && echo "true" || echo "false") - -if [[ "$IN_PR_LOOP_DIR" == "true" ]]; then - # Block state.md edits in PR loop - if is_state_file_path "$FILE_PATH_LOWER"; then - pr_loop_state_blocked_message >&2 - exit 2 - fi - - # Block read-only PR loop files - if is_pr_loop_readonly_file "$FILE_PATH_LOWER"; then - pr_loop_prompt_blocked_message >&2 - exit 2 - fi - - # For round-N-pr-resolve.md (Claude's resolution summary), validate round number - if is_pr_round_file_type "$FILE_PATH_LOWER" "pr-resolve"; then - validate_pr_resolve_round "$FILE_PATH_LOWER" "edit" || exit $? - exit 0 - fi -fi - # ======================================== # Methodology Analysis Phase Edit Restriction # ======================================== diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 3188c1a4..cbd9aa1e 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# PreToolUse Hook: Validate Read access for RLCR loop and PR loop files +# PreToolUse Hook: Validate Read access for RLCR loop files # # Blocks Claude from reading: # - Wrong round's prompt/summary/contract files (outdated information) @@ -9,9 +9,6 @@ # - Todos files (should use native Task tools instead) # - goal-tracker.md from old RLCR sessions # -# PR loop files (.humanize/pr-loop/) are generally allowed to read -# to give Claude access to comments, prompts, and feedback. -# set -euo pipefail diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index b7314ed9..a8747e9d 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# PreToolUse Hook: Validate Write paths for RLCR loop and PR loop +# PreToolUse Hook: Validate Write paths for RLCR loop # # Blocks Claude from writing to: # - Todos files (should use native Task tools instead) @@ -9,8 +9,6 @@ # - Wrong round number contract files # - Summary files outside .humanize/rlcr/ # - Goal tracker writes outside the active loop or that alter the immutable section -# - PR loop state files (.humanize/pr-loop/) -# - PR loop read-only files (pr-comment, prompt, codex-prompt, pr-check, pr-feedback) # set -euo pipefail @@ -71,32 +69,6 @@ if is_round_file_type "$FILE_PATH_LOWER" "prompt"; then exit 2 fi -# ======================================== -# PR Loop File Protection -# ======================================== - -IN_PR_LOOP_DIR=$(is_in_pr_loop_dir "$FILE_PATH" && echo "true" || echo "false") - -if [[ "$IN_PR_LOOP_DIR" == "true" ]]; then - # Block state.md writes in PR loop - if is_state_file_path "$FILE_PATH_LOWER"; then - pr_loop_state_blocked_message >&2 - exit 2 - fi - - # Block read-only PR loop files - if is_pr_loop_readonly_file "$FILE_PATH_LOWER"; then - pr_loop_prompt_blocked_message >&2 - exit 2 - fi - - # For round-N-pr-resolve.md (Claude's resolution summary), validate round number - if is_pr_round_file_type "$FILE_PATH_LOWER" "pr-resolve"; then - validate_pr_resolve_round "$FILE_PATH_LOWER" "write to" || exit $? - exit 0 - fi -fi - # ======================================== # Methodology Analysis Phase Write Restriction # ======================================== diff --git a/hooks/pr-loop-stop-hook.sh b/hooks/pr-loop-stop-hook.sh deleted file mode 100755 index c87abee3..00000000 --- a/hooks/pr-loop-stop-hook.sh +++ /dev/null @@ -1,1654 +0,0 @@ -#!/usr/bin/env bash -# -# Stop Hook for PR loop -# -# Intercepts Claude's exit attempts, polls for remote bot reviews, -# and uses local Codex to validate if bot concerns are addressed. -# -# Key features: -# - Polls until ALL active bots respond (per-bot tracking with 15min timeout each) -# - Checks PR state before polling (detects CLOSED/MERGED) -# - Uses APPROVE marker for Codex approval -# - Updates active_bots list based on per-bot approval -# -# State directory: .humanize/pr-loop// -# State file: state.md (current_round, pr_number, active_bots as YAML list, etc.) -# Resolve file: round-N-pr-resolve.md (Claude's resolution summary) -# Comment file: round-N-pr-comment.md (Fetched PR comments) -# Check file: round-N-pr-check.md (Local Codex validation) -# Feedback file: round-N-pr-feedback.md (Feedback for next round) -# - -set -euo pipefail - -# ======================================== -# Default Configuration -# ======================================== - -# Override effort before sourcing loop-common.sh (PR loop defaults to medium effort). -# codex_model is NOT pre-set here so that config-backed values from loop-common.sh apply. -DEFAULT_CODEX_EFFORT="medium" -DEFAULT_CODEX_TIMEOUT=900 -DEFAULT_POLL_INTERVAL=30 -DEFAULT_POLL_TIMEOUT=900 # 15 minutes per bot - -# Note: Bot name mapping functions (map_bot_to_author, map_author_to_bot) -# and helper functions (build_yaml_list, build_bot_mention_string) are -# provided by loop-common.sh which is sourced below. - -# ======================================== -# Read Hook Input -# ======================================== - -HOOK_INPUT=$(cat) - -# ======================================== -# Find Active Loop -# ======================================== - -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" -LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/pr-loop" - -# Source shared loop functions -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -source "$SCRIPT_DIR/lib/loop-common.sh" - -# Source portable timeout wrapper -PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -TEMPLATE_DIR="$PLUGIN_ROOT/prompt-template" -source "$PLUGIN_ROOT/scripts/portable-timeout.sh" - -# Default timeout for git/gh operations -GIT_TIMEOUT=30 -GH_TIMEOUT=60 - -# Use shared find_active_pr_loop function from loop-common.sh -LOOP_DIR=$(find_active_pr_loop "$LOOP_BASE_DIR") - -# If no active PR loop, let other hooks handle -if [[ -z "$LOOP_DIR" ]]; then - exit 0 -fi - -STATE_FILE="$LOOP_DIR/state.md" - -if [[ ! -f "$STATE_FILE" ]]; then - exit 0 -fi - -# ======================================== -# Parse State File (YAML list format for active_bots) -# ======================================== - -# Declare arrays outside function for macOS Bash 3.2 compatibility -# (declare -g requires Bash 4.2+, which macOS doesn't have by default) -PR_CONFIGURED_BOTS_ARRAY=() -PR_ACTIVE_BOTS_ARRAY=() - -parse_pr_loop_state() { - local state_file="$1" - - STATE_FRONTMATTER=$(sed -n '/^---$/,/^---$/{ /^---$/d; p; }' "$state_file" 2>/dev/null || echo "") - - PR_CURRENT_ROUND=$(echo "$STATE_FRONTMATTER" | grep "^current_round:" | sed "s/current_round: *//" | tr -d ' ' || true) - PR_MAX_ITERATIONS=$(echo "$STATE_FRONTMATTER" | grep "^max_iterations:" | sed "s/max_iterations: *//" | tr -d ' ' || true) - PR_NUMBER=$(echo "$STATE_FRONTMATTER" | grep "^pr_number:" | sed "s/pr_number: *//" | tr -d ' ' || true) - PR_START_BRANCH=$(echo "$STATE_FRONTMATTER" | grep "^start_branch:" | sed "s/start_branch: *//; s/^\"//; s/\"\$//" || true) - PR_CODEX_MODEL=$(echo "$STATE_FRONTMATTER" | grep "^codex_model:" | sed "s/codex_model: *//" | tr -d ' ' || true) - PR_CODEX_EFFORT=$(echo "$STATE_FRONTMATTER" | grep "^codex_effort:" | sed "s/codex_effort: *//" | tr -d ' ' || true) - PR_CODEX_TIMEOUT=$(echo "$STATE_FRONTMATTER" | grep "^codex_timeout:" | sed "s/codex_timeout: *//" | tr -d ' ' || true) - PR_POLL_INTERVAL=$(echo "$STATE_FRONTMATTER" | grep "^poll_interval:" | sed "s/poll_interval: *//" | tr -d ' ' || true) - PR_POLL_TIMEOUT=$(echo "$STATE_FRONTMATTER" | grep "^poll_timeout:" | sed "s/poll_timeout: *//" | tr -d ' ' || true) - PR_STARTED_AT=$(echo "$STATE_FRONTMATTER" | grep "^started_at:" | sed "s/started_at: *//" || true) - PR_LAST_TRIGGER_AT=$(echo "$STATE_FRONTMATTER" | grep "^last_trigger_at:" | sed "s/last_trigger_at: *//" || true) - - # New state fields for Cases 1-5 and force push detection - PR_STARTUP_CASE=$(echo "$STATE_FRONTMATTER" | grep "^startup_case:" | sed "s/startup_case: *//" | tr -d ' ' || true) - PR_LATEST_COMMIT_SHA=$(echo "$STATE_FRONTMATTER" | grep "^latest_commit_sha:" | sed "s/latest_commit_sha: *//" | tr -d ' ' || true) - PR_LATEST_COMMIT_AT=$(echo "$STATE_FRONTMATTER" | grep "^latest_commit_at:" | sed "s/latest_commit_at: *//" || true) - PR_TRIGGER_COMMENT_ID=$(echo "$STATE_FRONTMATTER" | grep "^trigger_comment_id:" | sed "s/trigger_comment_id: *//" | tr -d ' ' || true) - - # Parse configured_bots and active_bots as YAML lists - # configured_bots: never changes, used for polling all bots (allows re-add) - # active_bots: current bots with issues, shrinks as bots approve - # Arrays are declared outside function for macOS Bash 3.2 compatibility - PR_CONFIGURED_BOTS_ARRAY=() - PR_ACTIVE_BOTS_ARRAY=() - - # Parse YAML list helper function - # NOTE: Avoids 'local -n' (nameref) which requires Bash 4.3+ and fails on macOS Bash 3.2 - # Instead, outputs values to stdout and caller captures into array - parse_yaml_list() { - local field_name="$1" - local in_field=false - - while IFS= read -r line; do - if [[ "$line" =~ ^${field_name}: ]]; then - in_field=true - # Check if it's inline format: field: value - local inline_value="${line#*: }" - if [[ -n "$inline_value" && "$inline_value" != "${field_name}:" ]]; then - # Old comma-separated format for backwards compatibility - echo "$inline_value" | tr ',' '\n' | tr -d ' ' - in_field=false - fi - continue - fi - if [[ "$in_field" == "true" ]]; then - if [[ "$line" =~ ^[[:space:]]+-[[:space:]]+ ]]; then - # Extract bot name from " - botname" - local bot_name="${line#*- }" - bot_name=$(echo "$bot_name" | tr -d ' ') - if [[ -n "$bot_name" ]]; then - echo "$bot_name" - fi - elif [[ "$line" =~ ^[a-zA-Z_] ]]; then - # New field started, stop parsing - in_field=false - fi - fi - done <<< "$STATE_FRONTMATTER" - } - - # Read parsed values into arrays (macOS Bash 3.2 compatible) - while IFS= read -r bot; do - [[ -n "$bot" ]] && PR_CONFIGURED_BOTS_ARRAY+=("$bot") - done < <(parse_yaml_list "configured_bots") - - while IFS= read -r bot; do - [[ -n "$bot" ]] && PR_ACTIVE_BOTS_ARRAY+=("$bot") - done < <(parse_yaml_list "active_bots") - - # Backwards compatibility: if configured_bots is empty, use active_bots - if [[ ${#PR_CONFIGURED_BOTS_ARRAY[@]} -eq 0 ]]; then - PR_CONFIGURED_BOTS_ARRAY=("${PR_ACTIVE_BOTS_ARRAY[@]}") - fi - - # Apply defaults - PR_CURRENT_ROUND="${PR_CURRENT_ROUND:-0}" - PR_MAX_ITERATIONS="${PR_MAX_ITERATIONS:-42}" - PR_CODEX_MODEL="${PR_CODEX_MODEL:-$DEFAULT_CODEX_MODEL}" - PR_CODEX_EFFORT="${PR_CODEX_EFFORT:-$DEFAULT_CODEX_EFFORT}" - PR_CODEX_TIMEOUT="${PR_CODEX_TIMEOUT:-$DEFAULT_CODEX_TIMEOUT}" - PR_POLL_INTERVAL="${PR_POLL_INTERVAL:-$DEFAULT_POLL_INTERVAL}" - PR_POLL_TIMEOUT="${PR_POLL_TIMEOUT:-$DEFAULT_POLL_TIMEOUT}" -} - -parse_pr_loop_state "$STATE_FILE" - -# Build display string and mention string from active bots array -PR_ACTIVE_BOTS_DISPLAY=$(IFS=', '; echo "${PR_ACTIVE_BOTS_ARRAY[*]}") -PR_CONFIGURED_BOTS_DISPLAY=$(IFS=', '; echo "${PR_CONFIGURED_BOTS_ARRAY[*]}") - -# Build mention string from configured bots (for detecting trigger comments) -PR_BOT_MENTION_STRING=$(build_bot_mention_string "${PR_CONFIGURED_BOTS_ARRAY[@]}") - -# Validate required fields -if [[ -z "$PR_NUMBER" ]]; then - echo "Error: PR number not found in state file" >&2 - exit 0 -fi - -if [[ ! "$PR_CURRENT_ROUND" =~ ^[0-9]+$ ]]; then - echo "Warning: Invalid current_round in state file" >&2 - exit 0 -fi - -# ======================================== -# Resolve PR Base Repository (for fork PRs) -# ======================================== -# IMPORTANT: For fork PRs, comments are on the base repository, not the fork. -# gh pr view without --repo fails in forks because the PR number doesn't exist there. -# Strategy: First get current repo, check if PR exists there, then try parent repo for forks. -# NOTE: This MUST be done BEFORE PR state checks, which also need --repo for forks. - -# Step 1: Get the current repo (works in both forks and base repos) -CURRENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json owner,name \ - -q '.owner.login + "/" + .name' 2>/dev/null) || CURRENT_REPO="" - -# Step 2: Determine the correct repo for PR operations -# Try current repo first - if PR exists there, use it -PR_BASE_REPO="" -PR_LOOKUP_REPO="" # Repo where PR was found (for subsequent lookups) - -if [[ -n "$CURRENT_REPO" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" - PR_LOOKUP_REPO="$CURRENT_REPO" - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - # PR not found in current repo - check if this is a fork and try parent repo - PARENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json parent \ - -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - PR_LOOKUP_REPO="$PARENT_REPO" - fi - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - echo "Warning: Could not resolve PR base repository, using current repo" >&2 - PR_BASE_REPO="$CURRENT_REPO" - PR_LOOKUP_REPO="$CURRENT_REPO" -fi - -# ======================================== -# Check PR State (detect CLOSED/MERGED before polling) -# ======================================== -# NOTE: Uses PR_LOOKUP_REPO (resolved above) for fork PR support - -PR_STATE=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_LOOKUP_REPO" --json state -q .state 2>/dev/null) || PR_STATE="" - -if [[ "$PR_STATE" == "MERGED" ]]; then - echo "PR #$PR_NUMBER has been merged. Marking loop as complete." >&2 - mv "$STATE_FILE" "$LOOP_DIR/merged-state.md" - exit 0 -fi - -if [[ "$PR_STATE" == "CLOSED" ]]; then - echo "PR #$PR_NUMBER has been closed. Marking loop as closed." >&2 - mv "$STATE_FILE" "$LOOP_DIR/closed-state.md" - exit 0 -fi - -# ======================================== -# Check Resolution File Exists -# ======================================== - -RESOLVE_FILE="$LOOP_DIR/round-${PR_CURRENT_ROUND}-pr-resolve.md" - -if [[ ! -f "$RESOLVE_FILE" ]]; then - REASON="# Resolution Summary Missing - -Please write your resolution summary to: $RESOLVE_FILE - -The summary should include: -- Issues addressed -- Files modified -- Tests added (if any)" - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Resolution summary missing for round $PR_CURRENT_ROUND" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# ======================================== -# Check Git Status -# ======================================== - -if command -v git &>/dev/null && run_with_timeout "$GIT_TIMEOUT" git rev-parse --git-dir &>/dev/null 2>&1; then - GIT_STATUS_CACHED=$(run_with_timeout "$GIT_TIMEOUT" git status --porcelain 2>/dev/null) || GIT_EXIT=$? - GIT_EXIT=${GIT_EXIT:-0} - - if [[ $GIT_EXIT -ne 0 ]]; then - REASON="# Git Status Failed - -Git status operation failed. Please check your repository state and try again." - jq -n --arg reason "$REASON" --arg msg "PR Loop: Git status failed" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 - fi - - # Filter out .humanize from status check - NON_HUMANIZE_STATUS=$(echo "$GIT_STATUS_CACHED" | grep -v '\.humanize' || true) - - if [[ -n "$NON_HUMANIZE_STATUS" ]]; then - REASON="# Git Not Clean - -You have uncommitted changes. Please commit all changes before exiting. - -Changes detected: -\`\`\` -$NON_HUMANIZE_STATUS -\`\`\`" - jq -n --arg reason "$REASON" --arg msg "PR Loop: Uncommitted changes detected" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 - fi - - # Step 6: Check for unpushed commits (PR loop always requires push) - CURRENT_BRANCH=$(git branch --show-current 2>/dev/null || echo "main") - AHEAD_COUNT=0 - LOCAL_HEAD=$(git rev-parse HEAD 2>/dev/null) || LOCAL_HEAD="" - - # First try: git status -sb works when upstream is configured - GIT_AHEAD=$(run_with_timeout "$GIT_TIMEOUT" git status -sb 2>/dev/null | grep -o 'ahead [0-9]*' || true) - if [[ -n "$GIT_AHEAD" ]]; then - AHEAD_COUNT=$(echo "$GIT_AHEAD" | grep -o '[0-9]*') - else - # Fallback: Check if upstream exists, if not compare with origin/branch or PR head - if ! git rev-parse --abbrev-ref '@{u}' >/dev/null 2>&1; then - # No upstream configured - try origin/branch first - REMOTE_HEAD=$(git rev-parse "origin/$CURRENT_BRANCH" 2>/dev/null) || REMOTE_HEAD="" - if [[ -n "$LOCAL_HEAD" && -n "$REMOTE_HEAD" && "$LOCAL_HEAD" != "$REMOTE_HEAD" ]]; then - # Count commits ahead of remote - AHEAD_COUNT=$(git rev-list --count "origin/$CURRENT_BRANCH..HEAD" 2>/dev/null) || AHEAD_COUNT=0 - elif [[ -z "$REMOTE_HEAD" && -n "$PR_NUMBER" ]]; then - # No origin/branch exists - compare with PR's headRefOid from GitHub - # This handles cases where branch was never pushed or remote ref is missing - # NOTE: Use --repo for fork PR support (PR_BASE_REPO resolved earlier) - PR_HEAD_SHA=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_BASE_REPO" --json headRefOid -q '.headRefOid' 2>/dev/null) || PR_HEAD_SHA="" - if [[ -z "$PR_HEAD_SHA" ]]; then - # Failed to get PR head - fail closed (assume unpushed) for safety - echo "Warning: Could not fetch PR head SHA, assuming unpushed commits" >&2 - AHEAD_COUNT=1 - elif [[ -n "$LOCAL_HEAD" && "$LOCAL_HEAD" != "$PR_HEAD_SHA" ]]; then - # Local differs from PR head - count commits since PR head - AHEAD_COUNT=$(git rev-list --count "$PR_HEAD_SHA..HEAD" 2>/dev/null) || { - # PR head not in local history (force push?) - treat as 1 unpushed - AHEAD_COUNT=1 - } - fi - fi - fi - fi - - if [[ "$AHEAD_COUNT" -gt 0 ]]; then - FALLBACK_MSG="# Unpushed Commits Detected - -You have $AHEAD_COUNT unpushed commit(s). PR loop requires pushing changes so bots can review them. - -Please push: git push origin $CURRENT_BRANCH" - REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/unpushed-commits.md" "$FALLBACK_MSG" \ - "AHEAD_COUNT=$AHEAD_COUNT" "CURRENT_BRANCH=$CURRENT_BRANCH") - jq -n --arg reason "$REASON" --arg msg "PR Loop: $AHEAD_COUNT unpushed commit(s)" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 - fi -fi - -# ======================================== -# Force Push Detection -# ======================================== - -# Detect if the remote branch HEAD has changed in a way that indicates force push -# This happens when previous commits are no longer reachable from current HEAD -if [[ -n "$PR_LATEST_COMMIT_SHA" ]]; then - CURRENT_HEAD=$(run_with_timeout "$GIT_TIMEOUT" git rev-parse HEAD 2>/dev/null) || CURRENT_HEAD="" - - # Check if the stored commit SHA is still reachable from current HEAD - # If not, a force push (history rewrite) has occurred - if [[ -n "$CURRENT_HEAD" && "$CURRENT_HEAD" != "$PR_LATEST_COMMIT_SHA" ]]; then - # Check if old commit is ancestor of current HEAD - IS_ANCESTOR=$(run_with_timeout "$GIT_TIMEOUT" git merge-base --is-ancestor "$PR_LATEST_COMMIT_SHA" "$CURRENT_HEAD" 2>/dev/null && echo "yes" || echo "no") - - if [[ "$IS_ANCESTOR" == "no" ]]; then - echo "Force push detected: $PR_LATEST_COMMIT_SHA is no longer reachable from $CURRENT_HEAD" >&2 - - # Preserve OLD commit SHA before updating state - OLD_COMMIT_SHA="$PR_LATEST_COMMIT_SHA" - - # Get the timestamp of the new HEAD commit for trigger validation - # This ensures detect_trigger_comment only accepts comments AFTER the force push - # NOTE: Uses PR_LOOKUP_REPO for fork PR support - NEW_HEAD_COMMIT_AT=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_LOOKUP_REPO" --json commits \ - --jq '.commits | sort_by(.committedDate) | last | .committedDate' 2>/dev/null) || NEW_HEAD_COMMIT_AT="" - - if [[ -z "$NEW_HEAD_COMMIT_AT" ]]; then - # Fallback: use current timestamp - NEW_HEAD_COMMIT_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ") - fi - - # Update state file with new commit SHA/timestamp and clear trigger state - # Clear BOTH last_trigger_at AND trigger_comment_id to prevent stale eyes checks - TEMP_FILE="${STATE_FILE}.forcepush.$$" - sed -e "s/^latest_commit_sha:.*/latest_commit_sha: $CURRENT_HEAD/" \ - -e "s/^latest_commit_at:.*/latest_commit_at: $NEW_HEAD_COMMIT_AT/" \ - -e "s/^last_trigger_at:.*/last_trigger_at:/" \ - -e "s/^trigger_comment_id:.*/trigger_comment_id:/" \ - "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - - # Update local variables to reflect the change - PR_LATEST_COMMIT_SHA="$CURRENT_HEAD" - PR_LATEST_COMMIT_AT="$NEW_HEAD_COMMIT_AT" - PR_LAST_TRIGGER_AT="" - PR_TRIGGER_COMMENT_ID="" - - FALLBACK_MSG="# Force Push Detected - -A force push (history rewrite) has been detected. Post a new @bot trigger comment: $PR_BOT_MENTION_STRING" - REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/force-push-detected.md" "$FALLBACK_MSG" \ - "OLD_COMMIT=$OLD_COMMIT_SHA" "NEW_COMMIT=$CURRENT_HEAD" "BOT_MENTION_STRING=$PR_BOT_MENTION_STRING" \ - "PR_NUMBER=$PR_NUMBER") - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Force push detected - please re-trigger bots" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 - fi - fi -fi - -# ======================================== -# Check Max Iterations -# ======================================== - -NEXT_ROUND=$((PR_CURRENT_ROUND + 1)) - -if [[ $NEXT_ROUND -gt $PR_MAX_ITERATIONS ]]; then - echo "PR loop reached max iterations ($PR_MAX_ITERATIONS). Exiting." >&2 - mv "$STATE_FILE" "$LOOP_DIR/maxiter-state.md" - exit 0 -fi - -# ======================================== -# Check if Active Bots Remain -# ======================================== -# NOTE: Step 8 (Codex +1 check) has been moved to after trigger detection -# to ensure it uses the correct timestamp that accounts for new commits. - -if [[ ${#PR_ACTIVE_BOTS_ARRAY[@]} -eq 0 ]]; then - echo "All bots have approved. PR loop complete!" >&2 - mv "$STATE_FILE" "$LOOP_DIR/approve-state.md" - exit 0 -fi - -# ======================================== -# Detect Trigger Comment and Update last_trigger_at -# ======================================== - -# Get current GitHub user login for trigger comment filtering -get_current_user() { - run_with_timeout "$GH_TIMEOUT" gh api user --jq '.login' 2>/dev/null || echo "" -} - -# Find the most recent PR comment from CURRENT USER that contains bot mentions -# Returns: "timestamp|comment_id" on success -# This timestamp is used for --after filtering to catch fast bot replies -# NOTE: Uses --paginate to handle PRs with >30 comments -# IMPORTANT: If latest_commit_at is set, only accepts comments AFTER that timestamp -# This prevents old triggers from being re-used after force push -# IMPORTANT: Uses PR_BASE_REPO (not {owner}/{repo}) for fork PR support -detect_trigger_comment() { - local pr_num="$1" - local current_user="$2" - local after_timestamp="${3:-}" # Optional: only accept comments after this timestamp - - # Fetch ALL issue comments on the PR (paginated to handle >30 comments) - # Using --paginate ensures we don't miss the latest @mention on large PRs - # IMPORTANT: --jq with --paginate runs per-page, so we output objects (not array) - # and use jq -s to aggregate all pages into a single array before filtering - # IMPORTANT: Use PR_BASE_REPO for fork PRs - comments are on base repo, not fork - local comments_json - comments_json=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/$pr_num/comments" \ - --paginate --jq '.[] | {id: .id, author: .user.login, created_at: .created_at, body: .body}' 2>/dev/null \ - | jq -s '.') || return 1 - - if [[ -z "$comments_json" || "$comments_json" == "[]" ]]; then - return 1 - fi - - # Build pattern to match any @bot mention - local bot_pattern="" - for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - if [[ -n "$bot_pattern" ]]; then - bot_pattern="${bot_pattern}|@${bot}" - else - bot_pattern="@${bot}" - fi - done - - # Find most recent trigger comment from CURRENT USER (sorted by created_at descending) - # comments_json is already aggregated from all pages into a single array - # If after_timestamp is set, only accept comments created after that timestamp - # Returns both timestamp and comment ID - local trigger_info - if [[ -n "$after_timestamp" ]]; then - # Filter to only comments AFTER the specified timestamp (force push protection) - trigger_info=$(echo "$comments_json" | jq -r \ - --arg pattern "$bot_pattern" \ - --arg user "$current_user" \ - --arg after "$after_timestamp" ' - [.[] | select( - .author == $user and - (.body | test($pattern; "i")) and - (.created_at >= $after) - )] | - sort_by(.created_at) | reverse | .[0] | "\(.created_at)|\(.id)" // empty - ') - else - trigger_info=$(echo "$comments_json" | jq -r --arg pattern "$bot_pattern" --arg user "$current_user" ' - [.[] | select(.author == $user and (.body | test($pattern; "i")))] | - sort_by(.created_at) | reverse | .[0] | "\(.created_at)|\(.id)" // empty - ') - fi - - if [[ -n "$trigger_info" && "$trigger_info" != "null|null" && "$trigger_info" != "|" ]]; then - echo "$trigger_info" - return 0 - fi - - return 1 -} - -# Get current user for trigger comment filtering -CURRENT_USER=$(get_current_user) -if [[ -z "$CURRENT_USER" ]]; then - echo "Warning: Could not determine current GitHub user" >&2 -fi - -# ======================================== -# Refresh latest_commit_at from PR Before Trigger Detection -# ======================================== -# Ensure trigger validation uses the CURRENT latest commit timestamp, -# not a stale value from state. This prevents old triggers from being accepted -# after new (non-force) commits are pushed. - -# NOTE: Uses PR_LOOKUP_REPO for fork PR support -CURRENT_LATEST_COMMIT_AT=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_LOOKUP_REPO" --json commits \ - --jq '.commits | sort_by(.committedDate) | last | .committedDate' 2>/dev/null) || CURRENT_LATEST_COMMIT_AT="" - -# Track if new commits were detected (used to override REQUIRE_TRIGGER for cases 2/3) -NEW_COMMITS_DETECTED=false - -if [[ -n "$CURRENT_LATEST_COMMIT_AT" && "$CURRENT_LATEST_COMMIT_AT" != "$PR_LATEST_COMMIT_AT" ]]; then - echo "Updating latest_commit_at: $PR_LATEST_COMMIT_AT -> $CURRENT_LATEST_COMMIT_AT" >&2 - echo " Clearing stale trigger fields (new commits require new @bot mention)" >&2 - - # Persist to state file and clear trigger fields to prevent stale polling - # New commits mean old trigger is invalid - user must post new @bot comment - TEMP_FILE="${STATE_FILE}.commitrefresh.$$" - sed -e "s/^latest_commit_at:.*/latest_commit_at: $CURRENT_LATEST_COMMIT_AT/" \ - -e "s/^last_trigger_at:.*/last_trigger_at:/" \ - -e "s/^trigger_comment_id:.*/trigger_comment_id:/" \ - "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - - PR_LATEST_COMMIT_AT="$CURRENT_LATEST_COMMIT_AT" - PR_LAST_TRIGGER_AT="" - PR_TRIGGER_COMMENT_ID="" - NEW_COMMITS_DETECTED=true -fi - -# ALWAYS check for newer trigger comments and update last_trigger_at -# This ensures we use the most recent trigger, not a stale one -# IMPORTANT: Pass latest_commit_at to filter out old triggers (force push protection) -# After a force push, we need a NEW trigger comment, not one from before the push -echo "Detecting trigger comment timestamp from user '$CURRENT_USER'..." >&2 -if [[ -n "$PR_LATEST_COMMIT_AT" ]]; then - echo " (Filtering for comments after: $PR_LATEST_COMMIT_AT)" >&2 -fi -DETECTED_TRIGGER_INFO=$(detect_trigger_comment "$PR_NUMBER" "$CURRENT_USER" "$PR_LATEST_COMMIT_AT") || true -DETECTED_TRIGGER_AT="" -DETECTED_TRIGGER_COMMENT_ID="" - -if [[ -n "$DETECTED_TRIGGER_INFO" ]]; then - # Parse timestamp and comment ID from "timestamp|id" format - DETECTED_TRIGGER_AT="${DETECTED_TRIGGER_INFO%%|*}" - DETECTED_TRIGGER_COMMENT_ID="${DETECTED_TRIGGER_INFO##*|}" -fi - -if [[ -n "$DETECTED_TRIGGER_AT" ]]; then - # Check if detected trigger is newer than stored one - if [[ -z "$PR_LAST_TRIGGER_AT" ]] || [[ "$DETECTED_TRIGGER_AT" > "$PR_LAST_TRIGGER_AT" ]]; then - echo "Found trigger comment at: $DETECTED_TRIGGER_AT (ID: $DETECTED_TRIGGER_COMMENT_ID)" >&2 - if [[ -n "$PR_LAST_TRIGGER_AT" ]]; then - echo " (Updating from older trigger: $PR_LAST_TRIGGER_AT)" >&2 - fi - PR_LAST_TRIGGER_AT="$DETECTED_TRIGGER_AT" - PR_TRIGGER_COMMENT_ID="$DETECTED_TRIGGER_COMMENT_ID" - - # Persist to state file - TEMP_FILE="${STATE_FILE}.trigger.$$" - sed -e "s/^last_trigger_at:.*/last_trigger_at: $DETECTED_TRIGGER_AT/" \ - -e "s/^trigger_comment_id:.*/trigger_comment_id: $DETECTED_TRIGGER_COMMENT_ID/" \ - "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - - # Note: Claude eyes verification is done in the dedicated section below - # (after trigger detection) to ensure it runs on EVERY exit attempt - else - echo "Using existing trigger timestamp: $PR_LAST_TRIGGER_AT" >&2 - fi -fi - -# ======================================== -# Determine if Trigger is Required (needed for Claude eyes check below) -# ======================================== - -# Trigger requirement logic: -# - Round 0, startup_case 1: No trigger required (waiting for initial auto-reviews) -# - Round 0, startup_case 2/3: No trigger required (process existing comments) -# - Round 0, startup_case 4/5: Trigger required (new commits after reviews) -# - Round > 0: Always require trigger -# - NEW: If new commits detected during this poll, require trigger (overrides cases 2/3) - -REQUIRE_TRIGGER=false -if [[ "$PR_CURRENT_ROUND" -gt 0 ]]; then - # Subsequent rounds always require a trigger - REQUIRE_TRIGGER=true -elif [[ "$NEW_COMMITS_DETECTED" == "true" ]]; then - # New commits detected during this poll - require fresh trigger - # This overrides cases 2/3 to prevent reusing stale reviews - REQUIRE_TRIGGER=true -elif [[ "$PR_CURRENT_ROUND" -eq 0 ]]; then - case "${PR_STARTUP_CASE:-1}" in - 1|2|3) - # Case 1: No comments yet - wait for initial auto-reviews - # Case 2/3: Comments exist - process them without requiring new trigger - REQUIRE_TRIGGER=false - ;; - 4|5) - # Case 4/5: All commented but new commits pushed - require re-trigger - REQUIRE_TRIGGER=true - ;; - *) - # Unknown case, default to not requiring trigger - REQUIRE_TRIGGER=false - ;; - esac -fi - -# ======================================== -# Step 8: Check for Codex +1 Reaction (After Trigger Detection) -# ======================================== -# IMPORTANT: This check runs AFTER trigger detection to ensure: -# 1. We use the correct timestamp that accounts for new commits -# 2. If trigger is required but missing, we don't approve based on old +1 - -# Check for codex bot in active bots -CODEX_IN_ACTIVE=false -for bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$bot" == "codex" ]]; then - CODEX_IN_ACTIVE=true - break - fi -done - -if [[ "$CODEX_IN_ACTIVE" == "true" ]]; then - # Skip +1 check if trigger is required but not yet posted - # (User needs to post @codex comment first) - if [[ "$REQUIRE_TRIGGER" == "true" && -z "$PR_LAST_TRIGGER_AT" ]]; then - echo "Skipping Codex +1 check: trigger required but not yet posted" >&2 - else - echo "Round $PR_CURRENT_ROUND: Checking for Codex +1 reaction on PR..." >&2 - - # Determine the timestamp for filtering +1 reactions - # Use trigger timestamp if available, otherwise fall back to loop start time - CODEX_REACTION_AFTER="${PR_LAST_TRIGGER_AT:-$PR_STARTED_AT}" - echo " (Checking for +1 after: $CODEX_REACTION_AFTER)" >&2 - - # Check for +1 reaction from Codex - CODEX_REACTION=$("$PLUGIN_ROOT/scripts/check-bot-reactions.sh" codex-thumbsup "$PR_NUMBER" --after "$CODEX_REACTION_AFTER" 2>/dev/null) || CODEX_REACTION="" - - if [[ -n "$CODEX_REACTION" && "$CODEX_REACTION" != "null" ]]; then - REACTION_AT=$(echo "$CODEX_REACTION" | jq -r '.created_at') - echo "Codex +1 detected at $REACTION_AT - removing codex from active_bots" >&2 - - # Remove only codex from active_bots, keep other bots - declare -a NEW_ACTIVE_BOTS_AFTER_THUMBSUP=() - for bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$bot" != "codex" ]]; then - NEW_ACTIVE_BOTS_AFTER_THUMBSUP+=("$bot") - fi - done - - # If no other bots remain, loop is complete - if [[ ${#NEW_ACTIVE_BOTS_AFTER_THUMBSUP[@]} -eq 0 ]]; then - echo "Codex was the only active bot - PR loop approved!" >&2 - mv "$STATE_FILE" "$LOOP_DIR/approve-state.md" - exit 0 - fi - - # Update active_bots in state file and continue with other bots - echo "Continuing with remaining bots: ${NEW_ACTIVE_BOTS_AFTER_THUMBSUP[*]}" >&2 - PR_ACTIVE_BOTS_ARRAY=("${NEW_ACTIVE_BOTS_AFTER_THUMBSUP[@]}") - - # Update state file - NEW_ACTIVE_BOTS_YAML=$(build_yaml_list "${PR_ACTIVE_BOTS_ARRAY[@]}") - - TEMP_FILE="${STATE_FILE}.thumbsup.$$" - # Replace active_bots section in state file - awk -v new_bots="$NEW_ACTIVE_BOTS_YAML" ' - /^active_bots:/ { - print "active_bots:" new_bots - in_bots=1 - next - } - in_bots && /^[[:space:]]+-/ { next } - in_bots && /^[a-zA-Z]/ { in_bots=0 } - { print } - ' "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - fi - fi -fi - -# ======================================== -# Validate Trigger Comment Exists (Based on startup_case and round) -# ======================================== - -# Validate trigger FIRST, before Claude eyes check -# This ensures we don't waste time checking eyes on a stale trigger_comment_id - -if [[ "$REQUIRE_TRIGGER" == "true" && -z "$PR_LAST_TRIGGER_AT" ]]; then - # Determine startup case description for template - STARTUP_CASE_DESC="requires trigger comment" - case "${PR_STARTUP_CASE:-1}" in - 4) STARTUP_CASE_DESC="New commits after all bots reviewed" ;; - 5) STARTUP_CASE_DESC="New commits after partial bot reviews" ;; - *) STARTUP_CASE_DESC="Subsequent round requires trigger" ;; - esac - - FALLBACK_MSG="# Missing Trigger Comment - -No @bot mention found. Please run: gh pr comment $PR_NUMBER --body \"$PR_BOT_MENTION_STRING please review\"" - REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/no-trigger-comment.md" "$FALLBACK_MSG" \ - "STARTUP_CASE=${PR_STARTUP_CASE:-1}" "STARTUP_CASE_DESC=$STARTUP_CASE_DESC" \ - "CURRENT_ROUND=$PR_CURRENT_ROUND" "BOT_MENTION_STRING=$PR_BOT_MENTION_STRING") - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Missing trigger comment - please @mention bots first" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# ======================================== -# Claude Eyes Verification (AFTER trigger validation) -# ======================================== - -# Verify Claude eyes ONLY AFTER trigger is confirmed to exist -# This prevents checking eyes on a stale trigger_comment_id -# Conditions: -# 1. Claude is configured AND -# 2. A trigger is actually required (REQUIRE_TRIGGER=true) AND -# 3. A trigger comment ID exists (PR_TRIGGER_COMMENT_ID from confirmed detection above) - -CLAUDE_CONFIGURED=false -for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - if [[ "$bot" == "claude" ]]; then - CLAUDE_CONFIGURED=true - break - fi -done - -if [[ "$CLAUDE_CONFIGURED" == "true" && "$REQUIRE_TRIGGER" == "true" ]]; then - # Use the confirmed trigger comment ID (updated by detect_trigger_comment above) - TRIGGER_ID_TO_CHECK="${PR_TRIGGER_COMMENT_ID:-}" - - if [[ -n "$TRIGGER_ID_TO_CHECK" ]]; then - echo "Verifying Claude eyes reaction on trigger comment (ID: $TRIGGER_ID_TO_CHECK)..." >&2 - - # Check for eyes reaction with 3x5s retry - # Pass --pr for fork PR support (reactions are on base repo) - EYES_REACTION=$("$PLUGIN_ROOT/scripts/check-bot-reactions.sh" claude-eyes "$TRIGGER_ID_TO_CHECK" --pr "$PR_NUMBER" --retry 3 --delay 5 2>/dev/null) || EYES_REACTION="" - - if [[ -z "$EYES_REACTION" || "$EYES_REACTION" == "null" ]]; then - # Claude eyes verification is BLOCKING - error after 3x5s retries - FALLBACK_MSG="# Claude Bot Not Responding - -The Claude bot did not respond with an 'eyes' reaction within 15 seconds (3 x 5s retries). -Please verify the Claude bot is installed and configured for this repository." - REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/claude-eyes-timeout.md" "$FALLBACK_MSG" \ - "RETRY_COUNT=3" "TOTAL_WAIT_SECONDS=15") - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Claude bot not responding - check bot configuration" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 - else - echo "Claude eyes reaction confirmed!" >&2 - fi - else - # Trigger exists (PR_LAST_TRIGGER_AT is set) but no ID - should not happen normally - echo "Warning: Trigger exists but no comment ID for eyes verification" >&2 - fi -elif [[ "$CLAUDE_CONFIGURED" == "true" ]]; then - echo "Claude is configured but trigger not required (startup_case=${PR_STARTUP_CASE:-1}, round=$PR_CURRENT_ROUND) - skipping eyes verification" >&2 -fi - -# ======================================== -# Poll for New Bot Reviews (per-bot tracking) -# ======================================== - -# Poll ALL configured bots, not just active - allows re-adding approved bots if they post new issues -echo "Polling for new bot reviews on PR #$PR_NUMBER..." >&2 -echo "Configured bots: $PR_CONFIGURED_BOTS_DISPLAY" >&2 -echo "Active bots: $PR_ACTIVE_BOTS_DISPLAY" >&2 -echo "Poll interval: ${PR_POLL_INTERVAL}s, Timeout: ${PR_POLL_TIMEOUT}s per bot" >&2 - -POLL_SCRIPT="$PLUGIN_ROOT/scripts/poll-pr-reviews.sh" - -# Consistent file naming: round-N files all refer to round N -COMMENT_FILE="$LOOP_DIR/round-${NEXT_ROUND}-pr-comment.md" - -# Get timestamp for filtering based on startup_case and round -# - With trigger: use trigger timestamp (most accurate) -# - Round 0, Case 1: use started_at (waiting for new auto-reviews) -# - Round 0, Case 2/3: use epoch 0 to collect ALL existing comments -# - Round 0, Case 4/5: should have trigger (blocked above if missing) -AFTER_TIMESTAMP="" -USE_ALL_COMMENTS=false - -if [[ -n "$PR_LAST_TRIGGER_AT" ]]; then - # Always use trigger timestamp when available - AFTER_TIMESTAMP="$PR_LAST_TRIGGER_AT" - echo "Round $PR_CURRENT_ROUND: using trigger timestamp for --after: $AFTER_TIMESTAMP" >&2 -elif [[ "$PR_CURRENT_ROUND" -eq 0 ]]; then - case "${PR_STARTUP_CASE:-1}" in - 1) - # Case 1: No comments yet - filter by started_at to wait for new reviews - AFTER_TIMESTAMP="${PR_STARTED_AT}" - echo "Round 0, Case 1: using started_at for --after: $AFTER_TIMESTAMP" >&2 - ;; - 2|3) - # Case 2/3: Existing comments - collect ALL of them (no timestamp filter) - USE_ALL_COMMENTS=true - AFTER_TIMESTAMP="1970-01-01T00:00:00Z" # Epoch 0 to include all comments - echo "Round 0, Case ${PR_STARTUP_CASE}: collecting ALL existing bot comments" >&2 - ;; - *) - # Case 4/5 should have been blocked above, use started_at as fallback - AFTER_TIMESTAMP="${PR_STARTED_AT}" - echo "Round 0, Case ${PR_STARTUP_CASE}: using started_at for --after: $AFTER_TIMESTAMP" >&2 - ;; - esac -else - # Round N>0 with no trigger - this should have been blocked earlier - # but handle defensively by blocking here too - REASON="# Missing Trigger Comment - -No @bot mention comment found from you on this PR. - -Before polling for bot reviews, you must comment on the PR to trigger the bots. - -**Please run:** -\`\`\`bash -gh pr comment $PR_NUMBER --body \"$PR_BOT_MENTION_STRING please review the latest changes\" -\`\`\` - -Then try exiting again." - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Missing trigger comment" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# Convert trigger timestamp to epoch for timeout anchoring -# Per-bot timeouts are measured from the TRIGGER time, not poll start time -# Special case: when USE_ALL_COMMENTS is true (startup cases 2/3), we're looking at -# ALL historical comments. In this case, anchor timeout to NOW (poll start time) -# rather than PR_STARTED_AT, which could be hours old and cause instant timeout. -if [[ "$USE_ALL_COMMENTS" == "true" ]]; then - # Use current time as timeout anchor for historical comment review - TRIGGER_EPOCH=$(date +%s) -else - TRIGGER_EPOCH=$(date -d "$AFTER_TIMESTAMP" +%s 2>/dev/null || date -j -f "%Y-%m-%dT%H:%M:%SZ" "$AFTER_TIMESTAMP" +%s 2>/dev/null || date +%s) -fi - -# Track which bots have responded and their individual timeouts -# IMPORTANT: Poll ALL configured bots (not just active) so we can detect when -# previously approved bots post new issues and re-add them to active_bots -# IMPORTANT: Timeouts are anchored to TRIGGER_EPOCH, not poll start time -# This ensures the 15-minute window is measured from when the @mention was posted -# -# NOTE: Using dynamic variable names instead of associative arrays (declare -A) -# for macOS Bash 3.2 compatibility. Associative arrays require Bash 4.0+. -# Helper functions to get/set values: -_sanitize_key() { echo "$1" | tr -c 'a-zA-Z0-9_' '_'; } -_map_get() { local var="$1_$(_sanitize_key "$2")"; echo "${!var}"; } -_map_set() { local var="$1_$(_sanitize_key "$2")"; eval "$var=\"$3\""; } -_map_isset() { local var="$1_$(_sanitize_key "$2")"; [[ -n "${!var+x}" ]]; } - -POLL_START_EPOCH=$(date +%s) -echo "Timeout anchor: trigger at epoch $TRIGGER_EPOCH (poll started at $POLL_START_EPOCH)" >&2 -for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - _map_set "BOTS_RESPONDED" "$bot" "false" - _map_set "BOTS_TIMED_OUT" "$bot" "false" - # Use TRIGGER_EPOCH for timeout, not poll start - _map_set "BOTS_TIMEOUT_START" "$bot" "$TRIGGER_EPOCH" -done - -# Collect all new comments with deduplication by id -# Using dynamic variables: SEEN_ID_=1 -ALL_NEW_COMMENTS="[]" - -while true; do - CURRENT_TIME=$(date +%s) - - # Check if all configured bots have responded OR timed out (per-bot 15min timeout) - ALL_DONE=true - WAITING_BOTS="" - TIMED_OUT_BOTS="" - - for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - if [[ "$(_map_get BOTS_RESPONDED "$bot")" == "true" ]]; then - continue # Bot already responded - fi - - # Check per-bot timeout (15 minutes each) - auto-remove after timeout - BOT_ELAPSED=$((CURRENT_TIME - $(_map_get BOTS_TIMEOUT_START "$bot"))) - if [[ $BOT_ELAPSED -ge $PR_POLL_TIMEOUT ]]; then - echo "Bot '$bot' timed out after ${PR_POLL_TIMEOUT}s - will be removed from active_bots" >&2 - _map_set "BOTS_TIMED_OUT" "$bot" "true" # Mark as timed out for later removal - if [[ -n "$TIMED_OUT_BOTS" ]]; then - TIMED_OUT_BOTS="${TIMED_OUT_BOTS}, ${bot}" - else - TIMED_OUT_BOTS="$bot" - fi - continue # Mark as done (timed out) - fi - - # Bot still waiting - ALL_DONE=false - if [[ -n "$WAITING_BOTS" ]]; then - WAITING_BOTS="${WAITING_BOTS},${bot}" - else - WAITING_BOTS="$bot" - fi - done - - if [[ "$ALL_DONE" == "true" ]]; then - if [[ -n "$TIMED_OUT_BOTS" ]]; then - echo "Polling complete. Timed out bots: $TIMED_OUT_BOTS" >&2 - else - echo "All configured bots have responded!" >&2 - fi - break - fi - - # Check for cancel signal - if [[ -f "$LOOP_DIR/.cancel-requested" ]]; then - echo "Cancel requested, exiting poll loop..." >&2 - exit 0 - fi - - TOTAL_ELAPSED=$((CURRENT_TIME - POLL_START_EPOCH)) - echo "Poll attempt (elapsed: ${TOTAL_ELAPSED}s, waiting for: $WAITING_BOTS)..." >&2 - - # Poll for new comments from bots we're still waiting for - POLL_RESULT=$("$POLL_SCRIPT" "$PR_NUMBER" --after "$AFTER_TIMESTAMP" --bots "$WAITING_BOTS" 2>/dev/null) || { - echo "Warning: Poll script failed, retrying..." >&2 - sleep "$PR_POLL_INTERVAL" - continue - } - - # Check which bots responded (check all configured bots) - # Poll script returns author names (e.g., chatgpt-codex-connector[bot]) - # We need to map them back to bot names (e.g., codex) - RESPONDED_BOTS=$(echo "$POLL_RESULT" | jq -r '.bots_responded[]' 2>/dev/null || true) - for responded_author in $RESPONDED_BOTS; do - # Map author name to bot name (e.g., chatgpt-codex-connector[bot] -> codex) - responded_bot=$(map_author_to_bot "$responded_author") - for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - if [[ "$responded_bot" == "$bot" ]]; then - if [[ "$(_map_get BOTS_RESPONDED "$bot")" != "true" ]]; then - _map_set "BOTS_RESPONDED" "$bot" "true" - echo "Bot '$bot' has responded!" >&2 - fi - fi - done - done - - # Check for Codex +1 reaction during polling (any round) - # Codex may give +1 instead of commenting if no issues found - if [[ "$(_map_get BOTS_RESPONDED codex)" != "true" ]]; then - # Check if codex is a configured bot - CODEX_CONFIGURED=false - for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - [[ "$bot" == "codex" ]] && CODEX_CONFIGURED=true && break - done - - if [[ "$CODEX_CONFIGURED" == "true" ]]; then - # Determine timestamp for filtering - use trigger if available, else loop start - POLL_REACTION_AFTER="${PR_LAST_TRIGGER_AT:-$PR_STARTED_AT}" - - # Check for +1 reaction - THUMBSUP_RESULT=$("$PLUGIN_ROOT/scripts/check-bot-reactions.sh" codex-thumbsup "$PR_NUMBER" --after "$POLL_REACTION_AFTER" 2>/dev/null) || THUMBSUP_RESULT="" - - if [[ -n "$THUMBSUP_RESULT" && "$THUMBSUP_RESULT" != "null" ]]; then - # +1 found - codex approved without issues - echo "Codex +1 reaction detected during polling - treating as approval!" >&2 - _map_set "BOTS_RESPONDED" "codex" "true" - - # Remove codex from active_bots - declare -a NEW_ACTIVE_BOTS_THUMBSUP=() - for bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$bot" != "codex" ]]; then - NEW_ACTIVE_BOTS_THUMBSUP+=("$bot") - else - echo "Removing 'codex' from active_bots (approved via +1)" >&2 - fi - done - PR_ACTIVE_BOTS_ARRAY=("${NEW_ACTIVE_BOTS_THUMBSUP[@]}") - - # Update active_bots in state file - if [[ ${#PR_ACTIVE_BOTS_ARRAY[@]} -eq 0 ]]; then - echo "All bots have approved (codex via +1) - PR loop complete!" >&2 - mv "$STATE_FILE" "$LOOP_DIR/approve-state.md" - exit 0 - else - # Update state file with remaining bots - ACTIVE_BOTS_YAML=$(build_yaml_list "${PR_ACTIVE_BOTS_ARRAY[@]}") - # Use awk to replace active_bots section (portable across GNU/BSD) - TEMP_FILE="${STATE_FILE}.thumbsup.$$" - awk -v bots="$ACTIVE_BOTS_YAML" ' - /^active_bots:$/ { - print "active_bots:" bots - skip = 1 - next - } - skip && /^[a-z_]+:/ { skip = 0 } - skip && /^ - / { next } - !skip { print } - ' "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - fi - fi - fi - fi - - # Collect new comments WITH DEDUPLICATION by comment id - NEW_COMMENTS=$(echo "$POLL_RESULT" | jq -r '.comments' 2>/dev/null || echo "[]") - if [[ "$NEW_COMMENTS" != "[]" && "$NEW_COMMENTS" != "null" ]]; then - # Deduplicate: only add comments we haven't seen before - UNIQUE_COMMENTS="[]" - while IFS= read -r comment_json; do - [[ -z "$comment_json" || "$comment_json" == "null" ]] && continue - COMMENT_ID=$(echo "$comment_json" | jq -r '.id // empty') - if [[ -n "$COMMENT_ID" ]] && ! _map_isset "SEEN_COMMENT_IDS" "$COMMENT_ID"; then - _map_set "SEEN_COMMENT_IDS" "$COMMENT_ID" "1" - UNIQUE_COMMENTS=$(echo "$UNIQUE_COMMENTS" | jq --argjson c "$comment_json" '. + [$c]') - fi - done < <(echo "$NEW_COMMENTS" | jq -c '.[]') - - if [[ "$UNIQUE_COMMENTS" != "[]" ]]; then - ALL_NEW_COMMENTS=$(echo "$ALL_NEW_COMMENTS $UNIQUE_COMMENTS" | jq -s 'add') - fi - fi - - sleep "$PR_POLL_INTERVAL" -done - -# ======================================== -# Handle No Responses (auto-remove timed-out bots) -# ======================================== - -COMMENT_COUNT=$(echo "$ALL_NEW_COMMENTS" | jq 'length' 2>/dev/null || echo "0") - -if [[ "$COMMENT_COUNT" == "0" ]]; then - echo "No new bot reviews received." >&2 - - # Always remove timed-out bots from active_bots (per-bot timeout behavior) - # Don't wait for ALL bots to timeout - remove each bot as it times out - TIMED_OUT_COUNT=0 - WAITING_COUNT=0 - declare -a NEW_ACTIVE_BOTS_TIMEOUT=() - - for bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$(_map_get BOTS_TIMED_OUT "$bot")" == "true" ]]; then - TIMED_OUT_COUNT=$((TIMED_OUT_COUNT + 1)) - echo "Removing '$bot' from active_bots (timed out after ${PR_POLL_TIMEOUT}s)" >&2 - # Don't add to NEW_ACTIVE_BOTS_TIMEOUT - elif [[ "$(_map_get BOTS_RESPONDED "$bot")" != "true" ]]; then - WAITING_COUNT=$((WAITING_COUNT + 1)) - NEW_ACTIVE_BOTS_TIMEOUT+=("$bot") - else - # Bot responded - keep in active (will be processed if comments come in) - NEW_ACTIVE_BOTS_TIMEOUT+=("$bot") - fi - done - - # If any bots timed out, update the state file with remaining active bots - if [[ $TIMED_OUT_COUNT -gt 0 ]]; then - PR_ACTIVE_BOTS_ARRAY=("${NEW_ACTIVE_BOTS_TIMEOUT[@]}") - - # If no bots remain, loop is complete - if [[ ${#PR_ACTIVE_BOTS_ARRAY[@]} -eq 0 ]]; then - echo "All bots removed (timed out) - PR loop approved!" >&2 - # Build configured_bots YAML - TIMEOUT_CONFIGURED_BOTS_YAML=$(build_yaml_list "${PR_CONFIGURED_BOTS_ARRAY[@]}") - # Write updated state with empty active_bots before moving to approve-state.md - { - echo "---" - echo "current_round: $PR_CURRENT_ROUND" - echo "max_iterations: $PR_MAX_ITERATIONS" - echo "pr_number: $PR_NUMBER" - echo "start_branch: $PR_START_BRANCH" - echo "configured_bots:${TIMEOUT_CONFIGURED_BOTS_YAML}" - echo "active_bots:" - echo "codex_model: $PR_CODEX_MODEL" - echo "codex_effort: $PR_CODEX_EFFORT" - echo "codex_timeout: $PR_CODEX_TIMEOUT" - echo "poll_interval: $PR_POLL_INTERVAL" - echo "poll_timeout: $PR_POLL_TIMEOUT" - echo "started_at: $PR_STARTED_AT" - echo "startup_case: ${PR_STARTUP_CASE:-1}" - echo "latest_commit_sha: ${PR_LATEST_COMMIT_SHA:-}" - echo "latest_commit_at: ${PR_LATEST_COMMIT_AT:-}" - echo "last_trigger_at: ${PR_LAST_TRIGGER_AT:-}" - echo "trigger_comment_id: ${PR_TRIGGER_COMMENT_ID:-}" - echo "---" - } > "$LOOP_DIR/approve-state.md" - rm -f "$STATE_FILE" - exit 0 - fi - - # Persist updated active_bots to state file (some bots timed out, others still waiting) - echo "Updating state file with ${#PR_ACTIVE_BOTS_ARRAY[@]} remaining active bots" >&2 - TIMEOUT_ACTIVE_BOTS_YAML=$(build_yaml_list "${PR_ACTIVE_BOTS_ARRAY[@]}") - TEMP_FILE="${STATE_FILE}.timeout.$$" - awk -v bots="$TIMEOUT_ACTIVE_BOTS_YAML" ' - /^active_bots:$/ { - print "active_bots:" bots - skip = 1 - next - } - skip && /^[a-z_]+:/ { skip = 0 } - skip && /^ - / { next } - !skip { print } - ' "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - fi - - # Build list of bots that didn't respond (only non-timed-out bots that are still waiting) - MISSING_BOTS="" - for bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$(_map_get BOTS_RESPONDED "$bot")" != "true" ]]; then - if [[ -n "$MISSING_BOTS" ]]; then - MISSING_BOTS="${MISSING_BOTS}, ${bot}" - else - MISSING_BOTS="$bot" - fi - fi - done - - REASON="# Bot Review Timeout - -No new reviews received from bots after polling. - -**Bots that did not respond:** $MISSING_BOTS - -This might mean: -- The bots haven't been triggered (did you comment on the PR?) -- The bots are slow to respond -- The bots are not enabled on this repository - -**Options:** -1. Comment on the PR to trigger bot reviews: - \`\`\`bash - gh pr comment $PR_NUMBER --body \"$PR_BOT_MENTION_STRING please review the latest changes\" - \`\`\` -2. Wait and try exiting again -3. Cancel the loop: \`/humanize:cancel-pr-loop\`" - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Bot review timeout" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# ======================================== -# Save New Comments (correct file naming) -# ======================================== - -# Format comments grouped by bot (use configured bots for completeness) -cat > "$COMMENT_FILE" << EOF -# Bot Reviews (Round $NEXT_ROUND) - -Fetched at: $(date -u +%Y-%m-%dT%H:%M:%SZ) -Configured bots: $PR_CONFIGURED_BOTS_DISPLAY -Currently active: $PR_ACTIVE_BOTS_DISPLAY - ---- - -EOF - -# Group comments by ALL configured bots (not just active) -# This allows Codex to see when previously approved bots post new issues -for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - # Map bot name to author name (e.g., codex -> chatgpt-codex-connector[bot]) - author=$(map_bot_to_author "$bot") - BOT_COMMENTS=$(echo "$ALL_NEW_COMMENTS" | jq -r --arg author "$author" ' - [.[] | select(.author == $author)] - ') - BOT_COUNT=$(echo "$BOT_COMMENTS" | jq 'length') - - if [[ "$BOT_COUNT" -gt 0 ]]; then - echo "## Comments from ${author}" >> "$COMMENT_FILE" - echo "" >> "$COMMENT_FILE" - - echo "$BOT_COMMENTS" | jq -r ' - .[] | - "### Comment\n\n" + - "- **Type**: \(.type | gsub("_"; " "))\n" + - "- **Time**: \(.created_at)\n" + - (if .path then "- **File**: `\(.path)`\(if .line then " (line \(.line))" else "" end)\n" else "" end) + - (if .state then "- **Status**: \(.state)\n" else "" end) + - "\n\(.body)\n\n---\n" - ' >> "$COMMENT_FILE" - else - echo "## Comments from ${author}" >> "$COMMENT_FILE" - echo "" >> "$COMMENT_FILE" - echo "*No new comments from this bot.*" >> "$COMMENT_FILE" - echo "" >> "$COMMENT_FILE" - echo "---" >> "$COMMENT_FILE" - echo "" >> "$COMMENT_FILE" - fi -done - -echo "Comments saved to: $COMMENT_FILE" >&2 - -# ======================================== -# Run Local Codex Review of Bot Feedback -# ======================================== - -# Consistent file naming: all round-N files refer to round N -CHECK_FILE="$LOOP_DIR/round-${NEXT_ROUND}-pr-check.md" -FEEDBACK_FILE="$LOOP_DIR/round-${NEXT_ROUND}-pr-feedback.md" - -echo "Running local Codex review of bot feedback..." >&2 - -# Build Codex prompt with per-bot analysis -CODEX_PROMPT_FILE="$LOOP_DIR/round-${NEXT_ROUND}-codex-prompt.md" -BOT_REVIEW_CONTENT=$(cat "$COMMENT_FILE") - -# Build list of expected bots for Codex (all configured bots) -EXPECTED_BOTS_LIST="" -for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - EXPECTED_BOTS_LIST="${EXPECTED_BOTS_LIST}- ${bot}\n" -done - -# Load goal tracker update template (with fallback) -GOAL_TRACKER_FILE="$LOOP_DIR/goal-tracker.md" -GOAL_TRACKER_TEMPLATE_VARS=( - "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" - "NEXT_ROUND=$NEXT_ROUND" -) -GOAL_TRACKER_UPDATE_FALLBACK="## Goal Tracker Update -After analysis, update the goal tracker at $GOAL_TRACKER_FILE with current status." - -GOAL_TRACKER_UPDATE_INSTRUCTIONS=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/codex-goal-tracker-update.md" "$GOAL_TRACKER_UPDATE_FALLBACK" "${GOAL_TRACKER_TEMPLATE_VARS[@]}") - -cat > "$CODEX_PROMPT_FILE" << EOF -# PR Review Validation (Per-Bot Analysis) - -Analyze the following bot reviews and determine approval status FOR EACH BOT. - -## Expected Bots -$(echo -e "$EXPECTED_BOTS_LIST") - -## Bot Reviews -$BOT_REVIEW_CONTENT - -## Your Task - -1. For EACH expected bot, analyze their review (if present) -2. Determine if each bot is: - - **APPROVE**: Bot explicitly approves or says "no issues found", "LGTM", "Didn't find any major issues", etc. - - **ISSUES**: Bot identifies specific problems that need fixing - - **NO_RESPONSE**: Bot did not post any new comments - -3. Output your analysis to $CHECK_FILE with this EXACT structure: - -### Per-Bot Status -| Bot | Status | Summary | -|-----|--------|---------| -| | APPROVE/ISSUES/NO_RESPONSE | | - -### Issues Found (if any) -List ALL specific issues from bots that have ISSUES status. - -### Approved Bots (to remove from active_bots) -List bots that should be removed from active tracking (those with APPROVE status). - -### Final Recommendation -- If ALL bots have APPROVE status: End with "APPROVE" on its own line -- If any bot has ISSUES status: End with "ISSUES_REMAINING" on its own line -- If any bot has NO_RESPONSE status: End with "WAITING_FOR_BOTS" on its own line -- If any bot response indicates usage/rate limits hit (e.g., "usage limits", "rate limit", "quota exceeded"): End with "USAGE_LIMIT_HIT" on its own line - -$GOAL_TRACKER_UPDATE_INSTRUCTIONS -EOF - -# Check if codex is available -if ! command -v codex &>/dev/null; then - REASON="# Codex Not Found - -The 'codex' command is not installed or not in PATH. -PR loop requires Codex CLI to validate bot reviews. - -**To fix:** -1. Install Codex CLI -2. Retry the exit - -Or use \`/humanize:cancel-pr-loop\` to cancel the loop." - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Codex not found" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# Run Codex -CODEX_ARGS=("-m" "$PR_CODEX_MODEL") -if [[ -n "$PR_CODEX_EFFORT" ]]; then - CODEX_ARGS+=("-c" "model_reasoning_effort=${PR_CODEX_EFFORT}") -fi - -# Determine automation flag based on environment variable -# Default: Use --full-auto (safe mode with sandbox) -# If HUMANIZE_CODEX_BYPASS_SANDBOX is "true" or "1": Use --dangerously-bypass-approvals-and-sandbox -CODEX_AUTO_FLAG="--full-auto" -if [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "true" ]] || [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "1" ]]; then - CODEX_AUTO_FLAG="--dangerously-bypass-approvals-and-sandbox" -fi - -# Disable native hooks for nested Codex reviewer calls to prevent Stop-hook recursion. -# Probe whether the installed Codex CLI supports --disable; fall back to empty args -# so older builds do not fail with an unknown-argument error. -CODEX_DISABLE_HOOKS_ARGS=() -if codex --help 2>&1 | grep -q -- '--disable'; then - CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) -fi - -CODEX_ARGS+=("$CODEX_AUTO_FLAG" "-C" "$PROJECT_ROOT") - -CODEX_PROMPT_CONTENT=$(cat "$CODEX_PROMPT_FILE") -CODEX_EXIT_CODE=0 - -printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$PR_CODEX_TIMEOUT" codex "${CODEX_DISABLE_HOOKS_ARGS[@]}" exec "${CODEX_ARGS[@]}" - \ - > "$CHECK_FILE" 2>/dev/null || CODEX_EXIT_CODE=$? - -if [[ $CODEX_EXIT_CODE -ne 0 ]]; then - REASON="# Codex Review Failed - -Codex failed to validate bot reviews (exit code: $CODEX_EXIT_CODE). - -Please retry or cancel the loop." - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Codex review failed" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -if [[ ! -s "$CHECK_FILE" ]]; then - REASON="# Codex Review Empty - -Codex produced no output when validating bot reviews. - -Please retry or cancel the loop." - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Codex review empty" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# ======================================== -# Check Codex Result and Update active_bots -# ======================================== - -CHECK_CONTENT=$(cat "$CHECK_FILE") -LAST_LINE=$(echo "$CHECK_CONTENT" | grep -v '^[[:space:]]*$' | tail -1) -LAST_LINE_TRIMMED=$(echo "$LAST_LINE" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') - -# Use "APPROVE" marker to indicate all bots approved -if [[ "$LAST_LINE_TRIMMED" == "APPROVE" ]]; then - echo "All bots have approved! PR loop complete." >&2 - - # Update goal tracker BEFORE exit (idempotent - won't duplicate if Codex already updated) - if [[ -f "$GOAL_TRACKER_FILE" ]]; then - # For APPROVE, we record 0 new issues - update_pr_goal_tracker "$GOAL_TRACKER_FILE" "$NEXT_ROUND" '{"issues": 0, "resolved": 0, "bot": "All"}' || true - fi - - mv "$STATE_FILE" "$LOOP_DIR/approve-state.md" - exit 0 -fi - -# Handle WAITING_FOR_BOTS - block exit but don't advance round -if [[ "$LAST_LINE_TRIMMED" == "WAITING_FOR_BOTS" ]]; then - echo "Some bots haven't responded yet. Blocking exit." >&2 - - REASON="# Waiting for Bot Responses - -Some bots haven't posted their reviews yet. - -**Options:** -1. Wait and try exiting again (bots may still be processing) -2. Comment on the PR to trigger bot reviews: - \`\`\`bash - gh pr comment $PR_NUMBER --body \"$PR_BOT_MENTION_STRING please review the latest changes\" - \`\`\` -3. Cancel the loop: \`/humanize:cancel-pr-loop\` - -**Note:** The round counter will NOT advance until all expected bots respond." - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Waiting for bot responses" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# Handle USAGE_LIMIT_HIT - terminate loop gracefully (service limitation, not code issue) -if [[ "$LAST_LINE_TRIMMED" == "USAGE_LIMIT_HIT" ]]; then - echo "Bot usage/rate limits detected. Terminating PR loop." >&2 - - # Move state file to indicate usage limit termination - mv "$STATE_FILE" "$LOOP_DIR/usage-limit-state.md" - - # Let exit proceed without blocking - the loop is over due to external limitation - exit 0 -fi - -# ======================================== -# Update active_bots in state file -# ======================================== - -# Extract approved bots from Codex output and remove them from active_bots -# Look for "### Approved Bots" section -# NOTE: Use awk for more robust extraction that handles: -# - Section at end of file (no following ###) -# - Section immediately followed by ### (empty section) -APPROVED_SECTION=$(awk '/^### Approved Bots/{found=1; next} found && /^###/{exit} found{print}' "$CHECK_FILE" || true) - -# Extract bots with issues from Codex output (for re-add logic) -# Look for "### Per-Bot Status" table and find bots with ISSUES status -# NOTE: Use awk for more robust extraction -ISSUES_SECTION=$(awk '/^### Per-Bot Status/{found=1; next} found && /^###/{exit} found{print}' "$CHECK_FILE" || true) - -# Build new active_bots array with re-add logic -# IMPORTANT: Process ALL configured bots, not just currently active ones -# This allows re-adding bots that were previously approved but now have new issues -declare -a NEW_ACTIVE_BOTS=() -# NOTE: Using _map_set/get instead of declare -A for macOS Bash 3.2 compatibility - -# First, identify bots with issues from Codex output -while IFS= read -r line; do - if echo "$line" | grep -qiE '\|[[:space:]]*ISSUES[[:space:]]*\|'; then - # Extract bot name from table row: | botname | ISSUES | summary | - BOT_WITH_ISSUE=$(echo "$line" | sed 's/|/\n/g' | sed -n '2p' | tr -d ' ') - if [[ -n "$BOT_WITH_ISSUE" ]]; then - _map_set "BOTS_WITH_ISSUES" "$BOT_WITH_ISSUE" "true" - fi - fi - if echo "$line" | grep -qiE '\|[[:space:]]*APPROVE[[:space:]]*\|'; then - # Extract bot name from table row: | botname | APPROVE | summary | - BOT_APPROVED=$(echo "$line" | sed 's/|/\n/g' | sed -n '2p' | tr -d ' ') - if [[ -n "$BOT_APPROVED" ]]; then - _map_set "BOTS_APPROVED" "$BOT_APPROVED" "true" - fi - fi -done <<< "$ISSUES_SECTION" - -# Process ALL configured bots (not just currently active) -# This allows re-adding previously approved bots if they post new issues -# Also handle timed-out bots by removing them from active_bots -for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - # Check if bot timed out - remove from active_bots - if [[ "$(_map_get BOTS_TIMED_OUT "$bot")" == "true" ]]; then - echo "Removing '$bot' from active_bots (timed out after ${PR_POLL_TIMEOUT}s)" >&2 - continue # Don't add to NEW_ACTIVE_BOTS - fi - - if [[ "$(_map_get BOTS_WITH_ISSUES "$bot")" == "true" ]]; then - # Bot has issues - add to active list - if [[ "$(_map_get BOTS_APPROVED "$bot")" == "true" ]]; then - echo "Bot '$bot' was previously approved but has new issues - re-adding to active" >&2 - else - echo "Bot '$bot' has issues - keeping active" >&2 - fi - NEW_ACTIVE_BOTS+=("$bot") - elif [[ "$(_map_get BOTS_APPROVED "$bot")" == "true" ]]; then - # Bot approved with no new issues - remove from active - echo "Removing '$bot' from active_bots (approved)" >&2 - elif echo "$APPROVED_SECTION" | grep -qi "$bot"; then - # Bot mentioned in approved section - remove - echo "Removing '$bot' from active_bots (in approved section)" >&2 - else - # Bot not mentioned in ISSUES or APPROVE - check if was active - WAS_ACTIVE=false - for active_bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$bot" == "$active_bot" ]]; then - WAS_ACTIVE=true - break - fi - done - if [[ "$WAS_ACTIVE" == "true" ]]; then - # Was active, not mentioned - keep active (NO_RESPONSE case) - echo "Bot '$bot' not mentioned - keeping active" >&2 - NEW_ACTIVE_BOTS+=("$bot") - fi - fi -done - -# Update state file with new active_bots and incremented round -TEMP_FILE="${STATE_FILE}.tmp.$$" - -# Build new YAML list for active_bots -NEW_ACTIVE_BOTS_YAML=$(build_yaml_list "${NEW_ACTIVE_BOTS[@]}") - -# ======================================== -# Update PR Goal Tracker -# ======================================== -# Extract issue counts from Codex output and update goal tracker -# Count issues by looking at the Issues Found section -ISSUES_FOUND_COUNT=0 -ISSUES_RESOLVED_COUNT=0 - -# Count issues in the "### Issues Found" section -if grep -q "### Issues Found" "$CHECK_FILE" 2>/dev/null; then - # Count list items: numbered (1., 2.) or bullet (-, *) in Issues Found section - # NOTE: Use awk for robust extraction (handles section at end of file) - ISSUES_FOUND_COUNT=$(awk '/^### Issues Found/{found=1; next} found && /^###/{exit} found{print}' "$CHECK_FILE" \ - | grep -cE '^[0-9]+\.|^- |^\* ' 2>/dev/null || echo "0") -fi - -# Count resolved issues: issues are only resolved when ALL bots approve -# NOTE: If we reach this point, not all bots have approved (full APPROVE case -# already triggered early exit above), so issues found in this round are NOT -# resolved yet. Setting resolved=0 prevents inflating the resolved count when -# only some bots approve while others report issues. -# ISSUES_RESOLVED_COUNT stays 0 - issues will be marked resolved in a future -# round when all bots approve and the early exit path records the resolution. - -# Call update_pr_goal_tracker if goal tracker exists -if [[ -f "$GOAL_TRACKER_FILE" ]]; then - # NOTE: Use lowercase "codex" to match configured bot names and avoid duplicate rows - # (Codex itself writes rows with lowercase names in goal tracker) - BOT_RESULTS_JSON="{\"bot\": \"codex\", \"issues\": $ISSUES_FOUND_COUNT, \"resolved\": $ISSUES_RESOLVED_COUNT}" - update_pr_goal_tracker "$GOAL_TRACKER_FILE" "$NEXT_ROUND" "$BOT_RESULTS_JSON" || true -fi - -# Build YAML list for configured_bots (never changes) -CONFIGURED_BOTS_YAML=$(build_yaml_list "${PR_CONFIGURED_BOTS_ARRAY[@]}") - -# Update latest_commit_sha to current HEAD (for force push detection in next round) -NEW_LATEST_COMMIT_SHA=$(run_with_timeout "$GIT_TIMEOUT" git rev-parse HEAD 2>/dev/null) || NEW_LATEST_COMMIT_SHA="$PR_LATEST_COMMIT_SHA" -# NOTE: Sort by committedDate before selecting last - API order is not guaranteed -# NOTE: Uses PR_LOOKUP_REPO for fork PR support -NEW_LATEST_COMMIT_AT=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_LOOKUP_REPO" --json commits \ - --jq '.commits | sort_by(.committedDate) | last | .committedDate' 2>/dev/null) || NEW_LATEST_COMMIT_AT="$PR_LATEST_COMMIT_AT" - -# Re-evaluate startup_case dynamically -# This allows case to change as bot comments arrive -BOTS_COMMA_LIST=$(IFS=','; echo "${PR_CONFIGURED_BOTS_ARRAY[*]}") -NEW_REVIEWER_STATUS=$("$PLUGIN_ROOT/scripts/check-pr-reviewer-status.sh" "$PR_NUMBER" --bots "$BOTS_COMMA_LIST" 2>/dev/null) || NEW_REVIEWER_STATUS="" -if [[ -n "$NEW_REVIEWER_STATUS" ]]; then - NEW_STARTUP_CASE=$(echo "$NEW_REVIEWER_STATUS" | jq -r '.case') - if [[ -n "$NEW_STARTUP_CASE" && "$NEW_STARTUP_CASE" != "null" ]]; then - if [[ "$NEW_STARTUP_CASE" != "${PR_STARTUP_CASE:-1}" ]]; then - echo "Startup case changed: ${PR_STARTUP_CASE:-1} -> $NEW_STARTUP_CASE" >&2 - fi - PR_STARTUP_CASE="$NEW_STARTUP_CASE" - fi -fi - -# Create updated state file (with last_trigger_at cleared - will be set when next @mention posted) -{ - echo "---" - echo "current_round: $NEXT_ROUND" - echo "max_iterations: $PR_MAX_ITERATIONS" - echo "pr_number: $PR_NUMBER" - echo "start_branch: $PR_START_BRANCH" - echo "configured_bots:${CONFIGURED_BOTS_YAML}" - echo "active_bots:${NEW_ACTIVE_BOTS_YAML}" - echo "codex_model: $PR_CODEX_MODEL" - echo "codex_effort: $PR_CODEX_EFFORT" - echo "codex_timeout: $PR_CODEX_TIMEOUT" - echo "poll_interval: $PR_POLL_INTERVAL" - echo "poll_timeout: $PR_POLL_TIMEOUT" - echo "started_at: $PR_STARTED_AT" - echo "startup_case: ${PR_STARTUP_CASE:-1}" - echo "latest_commit_sha: $NEW_LATEST_COMMIT_SHA" - echo "latest_commit_at: ${NEW_LATEST_COMMIT_AT:-}" - echo "last_trigger_at:" - echo "trigger_comment_id: ${PR_TRIGGER_COMMENT_ID:-}" - echo "---" -} > "$TEMP_FILE" -mv "$TEMP_FILE" "$STATE_FILE" - -# Check if all bots are now approved -if [[ ${#NEW_ACTIVE_BOTS[@]} -eq 0 ]]; then - echo "All bots have now approved! PR loop complete." >&2 - mv "$STATE_FILE" "$LOOP_DIR/approve-state.md" - exit 0 -fi - -# ======================================== -# Issues Remaining - Continue Loop -# ======================================== - -# Build new bot mention string -NEW_BOT_MENTION_STRING=$(build_bot_mention_string "${NEW_ACTIVE_BOTS[@]}") - -# Create feedback file for next round -cat > "$FEEDBACK_FILE" << EOF -# PR Loop Feedback (Round $NEXT_ROUND) - -## Bot Review Analysis - -$CHECK_CONTENT - ---- - -## Your Task - -Address the issues identified above: - -1. Read and understand each issue -2. Make the necessary code changes -3. Commit and push your changes -4. Comment on the PR to trigger re-review: - \`\`\`bash - gh pr comment $PR_NUMBER --body "$NEW_BOT_MENTION_STRING please review the latest changes" - \`\`\` -5. Write your resolution summary to: $LOOP_DIR/round-${NEXT_ROUND}-pr-resolve.md - ---- - -**Remaining active bots:** $(IFS=', '; echo "${NEW_ACTIVE_BOTS[*]}") -**Round:** $NEXT_ROUND of $PR_MAX_ITERATIONS -EOF - -SYSTEM_MSG="PR Loop: Round $NEXT_ROUND/$PR_MAX_ITERATIONS - Bot reviews identified issues" - -jq -n \ - --arg reason "$(cat "$FEEDBACK_FILE")" \ - --arg msg "$SYSTEM_MSG" \ - '{ - "decision": "block", - "reason": $reason, - "systemMessage": $msg - }' - -exit 0 diff --git a/prompt-template/block/force-push-detected.md b/prompt-template/block/force-push-detected.md deleted file mode 100644 index b09feb50..00000000 --- a/prompt-template/block/force-push-detected.md +++ /dev/null @@ -1,17 +0,0 @@ -# Force Push Detected - -A **force push** has been detected on this PR. The commit SHA changed from `{{OLD_COMMIT}}` to `{{NEW_COMMIT}}` in a non-fast-forward manner. - -Force pushes reset the review state because the commit history has been rewritten. - -**Required Actions**: -1. The PR loop has updated its tracking to the new commit SHA -2. You must post a new trigger comment to restart the review cycle -3. Post a comment mentioning {{BOT_MENTION_STRING}} to trigger a new review - -**Example trigger comment**: -``` -{{BOT_MENTION_STRING}} Please review these changes. -``` - -After posting a trigger comment, you may attempt to continue. diff --git a/prompt-template/block/no-trigger-comment.md b/prompt-template/block/no-trigger-comment.md deleted file mode 100644 index 9f886f8e..00000000 --- a/prompt-template/block/no-trigger-comment.md +++ /dev/null @@ -1,17 +0,0 @@ -# No Trigger Comment Found - -The PR loop is waiting for a **trigger comment** before it can proceed. - -**Why is this required?** -- Startup case {{STARTUP_CASE}}: {{STARTUP_CASE_DESC}} -- Round {{CURRENT_ROUND}} requires explicit trigger to start the review cycle - -**Required Action**: -Post a comment on the PR mentioning {{BOT_MENTION_STRING}} to trigger a review. - -**Example trigger comment**: -``` -{{BOT_MENTION_STRING}} Please review these changes. -``` - -After posting a trigger comment, you may attempt to continue. diff --git a/prompt-template/block/pr-loop-prompt-write.md b/prompt-template/block/pr-loop-prompt-write.md deleted file mode 100644 index 625629ee..00000000 --- a/prompt-template/block/pr-loop-prompt-write.md +++ /dev/null @@ -1,9 +0,0 @@ -# PR Loop File Write Blocked - -You cannot write to `round-*-pr-comment.md` or `round-*-prompt.md` files in `.humanize/pr-loop/`. - -These files are generated by the PR loop system: -- `round-*-pr-comment.md`: Contains PR comments fetched from GitHub -- `round-*-prompt.md`: Contains instructions for the current round - -Both are read-only and managed by the system. diff --git a/prompt-template/block/pr-loop-state-modification.md b/prompt-template/block/pr-loop-state-modification.md deleted file mode 100644 index 5e17692d..00000000 --- a/prompt-template/block/pr-loop-state-modification.md +++ /dev/null @@ -1,12 +0,0 @@ -# PR Loop State File Modification Blocked - -You cannot modify `state.md` in `.humanize/pr-loop/`. This file is managed by the PR loop system. - -The state file contains: -- Current round number -- PR number and branch -- Active bots configuration -- Codex configuration -- Polling settings - -Modifying it would corrupt the PR loop state. diff --git a/prompt-template/pr-loop/codex-goal-tracker-update.md b/prompt-template/pr-loop/codex-goal-tracker-update.md deleted file mode 100644 index 65ba5379..00000000 --- a/prompt-template/pr-loop/codex-goal-tracker-update.md +++ /dev/null @@ -1,64 +0,0 @@ -## Goal Tracker Update Instructions - -After completing your analysis, update the goal tracker file at `{{GOAL_TRACKER_FILE}}`: - -### Required Updates - -1. **Add row to Issue Summary table:** - - Add a new row for this round with your review results - - Format: `| {{NEXT_ROUND}} | | | | |` - - Status should be: "Issues Found", "All Resolved", or "Approved" - -2. **Update Total Statistics section:** - - Increment `Total Issues Found` by number of new issues discovered - - Increment `Total Issues Resolved` by number of issues you verified as fixed - - Update `Remaining` to be (Total Found - Total Resolved) - -3. **Add Issue Log entry for this round:** - - Create heading: `### Round {{NEXT_ROUND}}` - - List each issue or approval with details - - Include reviewer name and brief description - -### Example Goal Tracker Update - -If bot "claude" reported 2 new issues and "codex" found 0 issues (approved): - -```markdown -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | -| 1 | claude | 2 | 0 | Issues Found | -| 1 | codex | 0 | 0 | Approved | - -## Total Statistics - -- Total Issues Found: 2 -- Total Issues Resolved: 0 -- Remaining: 2 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* - -Started: 2026-01-18T10:00:00Z -Startup Case: 1 - -### Round 1 -**claude** found 2 issues: -1. Missing error handling in auth.ts -2. Test coverage below 80% - -**codex** approved - no issues found. -``` - -### Important Rules - -- Keep the file structure intact -- Use proper markdown table formatting -- Only update the sections mentioned above (Issue Summary, Total Statistics, Issue Log) -- Do not modify the header sections (PR Information, Ultimate Goal) -- Add to existing tables, do not replace them -- Each reviewer gets a separate row in Issue Summary diff --git a/prompt-template/pr-loop/critical-requirements-has-comments.md b/prompt-template/pr-loop/critical-requirements-has-comments.md deleted file mode 100644 index 4855a306..00000000 --- a/prompt-template/pr-loop/critical-requirements-has-comments.md +++ /dev/null @@ -1,24 +0,0 @@ - -=========================================== -CRITICAL - Work Completion Requirements -=========================================== - -When you complete your work, you MUST: - -1. COMMIT and PUSH your changes: - - Create a commit with descriptive message - - Push to the remote repository - -2. Comment on the PR to trigger re-review: - gh pr comment {{PR_NUMBER}} --body "{{BOT_MENTION_STRING}} please review" - -3. Write your resolution summary to: - {{RESOLVE_PATH}} - - The summary should include: - - Issues addressed - - Files modified - - Tests added (if any) - -The Stop Hook will then poll for bot reviews. -=========================================== diff --git a/prompt-template/pr-loop/critical-requirements-no-comments.md b/prompt-template/pr-loop/critical-requirements-no-comments.md deleted file mode 100644 index 1b043501..00000000 --- a/prompt-template/pr-loop/critical-requirements-no-comments.md +++ /dev/null @@ -1,21 +0,0 @@ - -=========================================== -CRITICAL - Work Completion Requirements -=========================================== - -When you complete your work, you MUST: - -1. Write your resolution summary to: - {{RESOLVE_PATH}} - - The summary should note: - - This is Round 0 awaiting initial bot reviews - - No issues to address yet - -2. Try to exit - the Stop Hook will poll for bot reviews - -DO NOT comment on the PR to trigger review - the bots will -review automatically since this is a new PR. - -The Stop Hook will poll for bot reviews. -=========================================== diff --git a/prompt-template/pr-loop/goal-tracker-initial.md b/prompt-template/pr-loop/goal-tracker-initial.md deleted file mode 100644 index d95e9936..00000000 --- a/prompt-template/pr-loop/goal-tracker-initial.md +++ /dev/null @@ -1,33 +0,0 @@ -# PR Review Goal Tracker - -## PR Information - -- **PR Number:** #{{PR_NUMBER}} -- **Branch:** {{START_BRANCH}} -- **Started:** {{STARTED_AT}} -- **Monitored Bots:** {{ACTIVE_BOTS_DISPLAY}} -- **Startup Case:** {{STARTUP_CASE}} - -## Ultimate Goal - -Get all monitored bot reviewers ({{ACTIVE_BOTS_DISPLAY}}) to approve this PR. - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | - -## Total Statistics - -- Total Issues Found: 0 -- Total Issues Resolved: 0 -- Remaining: 0 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* - -Started: {{STARTED_AT}} -Startup Case: {{STARTUP_CASE}} diff --git a/prompt-template/pr-loop/round-0-header.md b/prompt-template/pr-loop/round-0-header.md deleted file mode 100644 index e694ce41..00000000 --- a/prompt-template/pr-loop/round-0-header.md +++ /dev/null @@ -1,15 +0,0 @@ -Read and execute below with ultrathink - -## PR Review Loop (Round 0) - -You are in a PR review loop monitoring feedback from remote review bots. - -**PR Information:** -- PR Number: #{{PR_NUMBER}} -- Branch: {{START_BRANCH}} -- Active Bots: {{ACTIVE_BOTS_DISPLAY}} - -## Review Comments - -The following comments have been fetched from the PR: - diff --git a/prompt-template/pr-loop/round-0-task-has-comments.md b/prompt-template/pr-loop/round-0-task-has-comments.md deleted file mode 100644 index 37f3e9f1..00000000 --- a/prompt-template/pr-loop/round-0-task-has-comments.md +++ /dev/null @@ -1,43 +0,0 @@ - ---- - -## Your Task - -1. **Analyze the comments above**, prioritizing: - - Human comments first (they take precedence) - - Bot comments (newest first) - -2. **Fix any issues** identified by the reviewers: - - Read the relevant code files - - Make necessary changes - - Create appropriate tests if needed - -3. **After fixing issues**: - - Commit your changes with a descriptive message - - Push to the remote repository - - Comment on the PR to trigger re-review: - ```bash - gh pr comment {{PR_NUMBER}} --body "{{BOT_MENTION_STRING}} please review the latest changes" - ``` - -4. **Write your resolution summary** to: @{{RESOLVE_PATH}} - - List what issues were addressed - - Files modified - - Tests added (if any) - ---- - -## Important Rules - -1. **Do not modify state files**: The .humanize/pr-loop/ files are managed by the system -2. **Always push changes**: Your fixes must be pushed for bots to review them -3. **Use the correct comment format**: Tag the bots to trigger their reviews -4. **Be thorough**: Address all valid concerns from the reviewers - ---- - -Note: After you write your summary and try to exit, the Stop Hook will: -1. Poll for new bot reviews (every 30 seconds, up to 15 minutes per bot) -2. When reviews arrive, local Codex will validate if they indicate approval -3. If issues remain, you will receive feedback and continue -4. If all bots approve, the loop ends diff --git a/prompt-template/pr-loop/round-0-task-no-comments.md b/prompt-template/pr-loop/round-0-task-no-comments.md deleted file mode 100644 index 6ed6d9a2..00000000 --- a/prompt-template/pr-loop/round-0-task-no-comments.md +++ /dev/null @@ -1,30 +0,0 @@ - ---- - -## Your Task - -This PR has no review comments yet. The monitored bots ({{ACTIVE_BOTS_DISPLAY}}) will automatically review the PR - you do NOT need to comment to trigger the first review. - -1. **Wait for automatic bot reviews**: - - Simply write your summary and try to exit - - The Stop Hook will poll for the first bot reviews - -2. **Write your initial summary** to: @{{RESOLVE_PATH}} - - Note that this is Round 0 awaiting initial bot reviews - - No issues to address yet - ---- - -## Important Rules - -1. **Do not comment to trigger review**: First reviews are automatic -2. **Do not modify state files**: The .humanize/pr-loop/ files are managed by the system -3. **Trust the process**: The Stop Hook manages polling and Codex validation - ---- - -Note: After you write your summary and try to exit, the Stop Hook will: -1. Poll for bot reviews (every 30 seconds, up to 15 minutes per bot) -2. When reviews arrive, local Codex will validate if they indicate approval -3. If issues are found, you will receive feedback and continue -4. If all bots approve, the loop ends diff --git a/scripts/cancel-pr-loop.sh b/scripts/cancel-pr-loop.sh deleted file mode 100755 index 388b536f..00000000 --- a/scripts/cancel-pr-loop.sh +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/bin/env bash -# -# Cancel script for cancel-pr-loop -# -# Cancels an active PR loop by creating a cancel signal file -# and renaming the state file to cancel-state.md. -# -# Usage: -# cancel-pr-loop.sh [--force] -# -# Exit codes: -# 0 - Successfully cancelled -# 1 - No active loop found -# 2 - Reserved for future use (e.g., confirmation required) -# 3 - Other error -# - -set -euo pipefail - -# ======================================== -# Parse Arguments -# ======================================== - -FORCE="false" - -while [[ $# -gt 0 ]]; do - case $1 in - --force) - FORCE="true" - shift - ;; - -h|--help) - cat << 'HELP_EOF' -cancel-pr-loop.sh - Cancel active PR loop - -USAGE: - cancel-pr-loop.sh [OPTIONS] - -OPTIONS: - --force Force cancel (currently has no additional effect) - -h, --help Show this help message - -EXIT CODES: - 0 - Successfully cancelled - 1 - No active loop found - 3 - Other error - -DESCRIPTION: - Cancels the active PR loop by: - 1. Finding the most recent PR loop directory - 2. Creating a .cancel-requested signal file - 3. Renaming state.md to cancel-state.md - -NOTE: - This command only affects PR loops (.humanize/pr-loop/). - RLCR loops (.humanize/rlcr/) are not affected. -HELP_EOF - exit 0 - ;; - *) - echo "Unknown option: $1" >&2 - echo "Use --help for usage information" >&2 - exit 3 - ;; - esac -done - -# ======================================== -# Find Loop Directory -# ======================================== - -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" -LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/pr-loop" - -# Find newest loop directory (different from RLCR - uses pr-loop instead of rlcr) -LOOP_DIR=$(ls -1d "$LOOP_BASE_DIR"/*/ 2>/dev/null | sort -r | head -1) || true - -if [[ -z "$LOOP_DIR" ]]; then - echo "NO_LOOP" - echo "No active PR loop found." - exit 1 -fi - -# ======================================== -# Check Loop State -# ======================================== - -STATE_FILE="$LOOP_DIR/state.md" -CANCEL_SIGNAL="$LOOP_DIR/.cancel-requested" - -if [[ -f "$STATE_FILE" ]]; then - LOOP_STATE="ACTIVE" - ACTIVE_STATE_FILE="$STATE_FILE" -else - echo "NO_ACTIVE_LOOP" - echo "No active PR loop found. The loop directory exists but no active state file is present." - exit 1 -fi - -# ======================================== -# Extract Round Info -# ======================================== - -# Extract current_round and max_iterations from the state file -CURRENT_ROUND=$(grep -E '^current_round:' "$ACTIVE_STATE_FILE" | sed 's/^current_round:[[:space:]]*//' | tr -d ' ') -MAX_ITERATIONS=$(grep -E '^max_iterations:' "$ACTIVE_STATE_FILE" | sed 's/^max_iterations:[[:space:]]*//' | tr -d ' ') -PR_NUMBER=$(grep -E '^pr_number:' "$ACTIVE_STATE_FILE" | sed 's/^pr_number:[[:space:]]*//' | tr -d ' ') - -# Default values if not found -CURRENT_ROUND=${CURRENT_ROUND:-"?"} -MAX_ITERATIONS=${MAX_ITERATIONS:-"?"} -PR_NUMBER=${PR_NUMBER:-"?"} - -# ======================================== -# Perform Cancellation -# ======================================== - -# Create cancel signal file -touch "$CANCEL_SIGNAL" - -# Rename state file to cancel-state.md -mv "$ACTIVE_STATE_FILE" "$LOOP_DIR/cancel-state.md" - -# ======================================== -# Output Result -# ======================================== - -echo "CANCELLED" -echo "Cancelled PR loop for PR #$PR_NUMBER (was at round $CURRENT_ROUND of $MAX_ITERATIONS)." -echo "State preserved as cancel-state.md" - -exit 0 diff --git a/scripts/check-bot-reactions.sh b/scripts/check-bot-reactions.sh deleted file mode 100755 index e14861a6..00000000 --- a/scripts/check-bot-reactions.sh +++ /dev/null @@ -1,308 +0,0 @@ -#!/usr/bin/env bash -# -# Check bot reactions on PR or comments -# -# Detects: -# - Codex +1 (thumbs-up) reaction on PR body (first round approval) -# - Claude eyes reaction on trigger comments (confirmation of receipt) -# -# Usage: -# check-bot-reactions.sh codex-thumbsup [--after ] -# check-bot-reactions.sh claude-eyes [--retry ] [--delay ] -# -# Exit codes: -# 0 - Reaction found -# 1 - Reaction not found (or timeout after all retries) -# 2 - Error (API failure, missing arguments, etc.) - -set -euo pipefail - -# ======================================== -# Default Configuration -# ======================================== - -# Timeout for gh operations -GH_TIMEOUT=30 - -# Default retry settings for claude eyes -DEFAULT_MAX_RETRIES=3 -DEFAULT_RETRY_DELAY=5 - -# Source portable timeout wrapper -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -source "$SCRIPT_DIR/portable-timeout.sh" - -# ======================================== -# Helper Functions -# ======================================== - -show_help() { - cat << 'EOF' -check-bot-reactions.sh - Detect bot reactions on GitHub PRs and comments - -USAGE: - check-bot-reactions.sh codex-thumbsup [--after ] - check-bot-reactions.sh claude-eyes [--retry ] [--delay ] - -COMMANDS: - codex-thumbsup Check for Codex +1 reaction on PR body - Returns reaction created_at timestamp if found - --after: Only count reaction if created after this timestamp - - claude-eyes Check for Claude eyes reaction on a specific comment - Retries with delay if not found immediately - --retry: Number of attempts (default: 3) - --delay: Seconds between attempts (default: 5) - -EXIT CODES: - 0 - Reaction found (outputs JSON with reaction info) - 1 - Reaction not found - 2 - Error (API failure, etc.) - -EXAMPLES: - # Check if Codex approved PR #123 with thumbs-up - check-bot-reactions.sh codex-thumbsup 123 - - # Check if Codex approved after loop started - check-bot-reactions.sh codex-thumbsup 123 --after "2026-01-18T10:00:00Z" - - # Wait for Claude eyes reaction on comment (15 seconds total) - check-bot-reactions.sh claude-eyes 12345678 --retry 3 --delay 5 -EOF - exit 0 -} - -# ======================================== -# Parse Arguments -# ======================================== - -COMMAND="${1:-}" -shift || true - -if [[ -z "$COMMAND" ]] || [[ "$COMMAND" == "-h" ]] || [[ "$COMMAND" == "--help" ]]; then - show_help -fi - -case "$COMMAND" in - codex-thumbsup) - # Parse codex-thumbsup arguments - PR_NUMBER="" - AFTER_TIMESTAMP="" - - while [[ $# -gt 0 ]]; do - case $1 in - --after) - AFTER_TIMESTAMP="$2" - shift 2 - ;; - -*) - echo "Error: Unknown option for codex-thumbsup: $1" >&2 - exit 2 - ;; - *) - if [[ -z "$PR_NUMBER" ]]; then - PR_NUMBER="$1" - else - echo "Error: Multiple PR numbers specified" >&2 - exit 2 - fi - shift - ;; - esac - done - - if [[ -z "$PR_NUMBER" ]]; then - echo "Error: PR number is required for codex-thumbsup" >&2 - exit 2 - fi - - # IMPORTANT: Use the PR's base repository for API calls (for fork PR support) - # Reactions are on the base repo, not the fork - # Strategy: Try current repo first, check if PR exists there, then try parent repo for forks - - # Step 1: Get current repo - CURRENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json owner,name \ - -q '.owner.login + "/" + .name' 2>/dev/null) || CURRENT_REPO="" - - # Step 2: Determine the correct repo for PR operations - # Try current repo first - if PR exists there, use it - PR_BASE_REPO="" - if [[ -n "$CURRENT_REPO" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" - fi - fi - - # Step 3: If PR not found in current repo, try parent repo (fork case) - if [[ -z "$PR_BASE_REPO" ]]; then - PARENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json parent \ - -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - fi - fi - fi - - # Step 4: Final fallback to current repo - if [[ -z "$PR_BASE_REPO" ]]; then - PR_BASE_REPO="$CURRENT_REPO" - fi - - # Fetch PR reactions (with pagination to catch all reactions) - # The PR body is treated as issue #PR_NUMBER, so we use the issues reactions endpoint - # IMPORTANT: Use PR_BASE_REPO for fork PR support - # IMPORTANT: Use --paginate to fetch all reactions (default is 30 per page) - # NOTE: --paginate with --jq emits one array per page; use jq -s 'add' to merge them - REACTIONS=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/$PR_NUMBER/reactions" \ - --paginate --jq '[.[] | {user: .user.login, content: .content, created_at: .created_at}]' 2>/dev/null \ - | jq -s 'add // []') || { - echo "Error: Failed to fetch PR reactions" >&2 - exit 2 - } - - # Look for Codex +1 reaction - # User login: chatgpt-codex-connector[bot] - CODEX_REACTION=$(echo "$REACTIONS" | jq -r ' - [.[] | select(.user == "chatgpt-codex-connector[bot]" and .content == "+1")] | .[0] // empty - ') - - if [[ "$CODEX_REACTION" == "null" ]] || [[ -z "$CODEX_REACTION" ]]; then - # No +1 reaction from Codex - exit 1 - fi - - REACTION_AT=$(echo "$CODEX_REACTION" | jq -r '.created_at') - - # If --after specified, check timestamp - if [[ -n "$AFTER_TIMESTAMP" ]]; then - if [[ "$REACTION_AT" < "$AFTER_TIMESTAMP" ]]; then - # Reaction exists but is older than specified timestamp - exit 1 - fi - fi - - # Output reaction info - echo "$CODEX_REACTION" - exit 0 - ;; - - claude-eyes) - # Parse claude-eyes arguments - COMMENT_ID="" - PR_NUMBER="" - MAX_RETRIES="$DEFAULT_MAX_RETRIES" - RETRY_DELAY="$DEFAULT_RETRY_DELAY" - - while [[ $# -gt 0 ]]; do - case $1 in - --retry) - MAX_RETRIES="$2" - shift 2 - ;; - --delay) - RETRY_DELAY="$2" - shift 2 - ;; - --pr) - PR_NUMBER="$2" - shift 2 - ;; - -*) - echo "Error: Unknown option for claude-eyes: $1" >&2 - exit 2 - ;; - *) - if [[ -z "$COMMENT_ID" ]]; then - COMMENT_ID="$1" - else - echo "Error: Multiple comment IDs specified" >&2 - exit 2 - fi - shift - ;; - esac - done - - if [[ -z "$COMMENT_ID" ]]; then - echo "Error: Comment ID is required for claude-eyes" >&2 - exit 2 - fi - - # IMPORTANT: Use the PR's base repository for API calls (for fork PR support) - # Reactions are on the base repo, not the fork - # Strategy: Try current repo first, check if PR exists there, then try parent repo for forks - - # Step 1: Get current repo - CURRENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json owner,name \ - -q '.owner.login + "/" + .name' 2>/dev/null) || CURRENT_REPO="" - - # Step 2: Determine the correct repo for PR operations (if PR number provided) - PR_BASE_REPO="" - if [[ -n "$PR_NUMBER" && -n "$CURRENT_REPO" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" - fi - fi - - # Step 3: If PR not found in current repo and PR number provided, try parent repo (fork case) - if [[ -z "$PR_BASE_REPO" && -n "$PR_NUMBER" ]]; then - PARENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json parent \ - -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - fi - fi - fi - - # Step 4: Final fallback to current repo - if [[ -z "$PR_BASE_REPO" ]]; then - PR_BASE_REPO="$CURRENT_REPO" - fi - - # Retry loop for eyes reaction - for attempt in $(seq 1 "$MAX_RETRIES"); do - # Wait before checking (gives Claude time to react) - sleep "$RETRY_DELAY" - - # Fetch comment reactions (with pagination to catch all reactions) - # IMPORTANT: Use PR_BASE_REPO for fork PR support - # IMPORTANT: Use --paginate to fetch all reactions (default is 30 per page) - # NOTE: --paginate with --jq emits one array per page; use jq -s 'add' to merge them - REACTIONS=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/comments/$COMMENT_ID/reactions" \ - --paginate --jq '[.[] | {user: .user.login, content: .content, created_at: .created_at}]' 2>/dev/null \ - | jq -s 'add // []') || { - # API error - continue to next attempt - continue - } - - # Look for Claude eyes reaction - # User login: claude[bot] - CLAUDE_REACTION=$(echo "$REACTIONS" | jq -r ' - [.[] | select(.user == "claude[bot]" and .content == "eyes")] | .[0] // empty - ') - - if [[ "$CLAUDE_REACTION" != "null" ]] && [[ -n "$CLAUDE_REACTION" ]]; then - # Found eyes reaction - echo "$CLAUDE_REACTION" - exit 0 - fi - - # Not found yet, will retry if attempts remain - if [[ $attempt -lt $MAX_RETRIES ]]; then - echo "Attempt $attempt/$MAX_RETRIES: Eyes not found, retrying..." >&2 - fi - done - - # All attempts exhausted - echo "No eyes reaction found after $MAX_RETRIES attempts ($(( MAX_RETRIES * RETRY_DELAY )) seconds total)" >&2 - exit 1 - ;; - - *) - echo "Error: Unknown command: $COMMAND" >&2 - echo "Use --help for usage information" >&2 - exit 2 - ;; -esac diff --git a/scripts/check-pr-reviewer-status.sh b/scripts/check-pr-reviewer-status.sh deleted file mode 100755 index e4915c99..00000000 --- a/scripts/check-pr-reviewer-status.sh +++ /dev/null @@ -1,275 +0,0 @@ -#!/usr/bin/env bash -# -# Check PR reviewer status for startup case determination -# -# Analyzes reviewer comments on ENTIRE PR (not just after latest commit) -# to determine which startup case applies. -# -# Usage: -# check-pr-reviewer-status.sh --bots -# -# Output (JSON): -# { -# "case": 1-5, -# "reviewers_commented": ["claude"], -# "reviewers_missing": ["codex"], -# "latest_commit_sha": "abc123", -# "latest_commit_at": "2026-01-18T12:00:00Z", -# "newest_review_at": "2026-01-18T11:00:00Z", -# "has_commits_after_reviews": true -# } -# -# Cases: -# 1 - No reviewer comments at all -# 2 - Some (not all) reviewers commented -# 3 - All reviewers commented, no new commits after -# 4 - All reviewers commented, new commits after (needs re-review) -# 5 - All reviewers commented, new commits after (like case 4, for future distinction) - -set -euo pipefail - -# ======================================== -# Default Configuration -# ======================================== - -# Timeout for gh operations -GH_TIMEOUT=60 - -# Source portable timeout wrapper -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -source "$SCRIPT_DIR/portable-timeout.sh" - -# ======================================== -# Parse Arguments -# ======================================== - -PR_NUMBER="" -BOT_LIST="" - -while [[ $# -gt 0 ]]; do - case $1 in - --bots) - if [[ -z "${2:-}" ]]; then - echo "Error: --bots requires a comma-separated list of bot names" >&2 - exit 1 - fi - BOT_LIST="$2" - shift 2 - ;; - -*) - echo "Error: Unknown option: $1" >&2 - exit 1 - ;; - *) - if [[ -z "$PR_NUMBER" ]]; then - PR_NUMBER="$1" - else - echo "Error: Multiple PR numbers specified" >&2 - exit 1 - fi - shift - ;; - esac -done - -if [[ -z "$PR_NUMBER" ]]; then - echo "Error: PR number is required" >&2 - echo "Usage: check-pr-reviewer-status.sh --bots " >&2 - exit 1 -fi - -if [[ -z "$BOT_LIST" ]]; then - echo "Error: --bots is required" >&2 - echo "Usage: check-pr-reviewer-status.sh --bots " >&2 - exit 1 -fi - -# ======================================== -# Bot Name Mapping -# ======================================== - -# Map bot names to GitHub comment author names: -# - claude -> claude[bot] -# - codex -> chatgpt-codex-connector[bot] -map_bot_to_author() { - local bot="$1" - case "$bot" in - codex) echo "chatgpt-codex-connector[bot]" ;; - *) echo "${bot}[bot]" ;; - esac -} - -# ======================================== -# Fetch PR Data -# ======================================== - -# Parse bot list into array -IFS=',' read -ra BOTS <<< "$BOT_LIST" - -# IMPORTANT: For fork PRs, we need to resolve the base (upstream) repository -# gh pr view without --repo fails in forks because the PR number doesn't exist there -# Strategy: First get current repo, check if PR exists there, then try parent repo for forks - -# Step 1: Get the current repo (works in both forks and base repos) -CURRENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json owner,name \ - -q '.owner.login + "/" + .name' 2>/dev/null) || CURRENT_REPO="" - -# Step 2: Determine the correct repo for PR operations -# Try current repo first - if PR exists there, use it -PR_BASE_REPO="" -if [[ -n "$CURRENT_REPO" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - # PR not found in current repo - check if this is a fork and try parent repo - PARENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json parent \ - -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - fi - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - echo "Warning: Could not resolve PR base repository, using current repo" >&2 - PR_BASE_REPO="$CURRENT_REPO" -fi - -# Get latest commit info (use --repo for fork support) -COMMIT_INFO=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_BASE_REPO" \ - --json headRefOid,commits \ - --jq '{sha: .headRefOid, date: (.commits | sort_by(.committedDate) | last | .committedDate)}' 2>/dev/null) || { - echo "Error: Failed to fetch PR commit info" >&2 - exit 1 -} - -LATEST_COMMIT_SHA=$(echo "$COMMIT_INFO" | jq -r '.sha') -LATEST_COMMIT_AT=$(echo "$COMMIT_INFO" | jq -r '.date') - -# Fetch all comments (issue comments, review comments, and PR review submissions) -# Using --paginate to handle PRs with many comments -# IMPORTANT: Use PR_BASE_REPO for fork PR support -ISSUE_COMMENTS=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/$PR_NUMBER/comments" \ - --paginate --jq '[.[] | {author: .user.login, created_at: .created_at, body: .body}]' 2>/dev/null) || ISSUE_COMMENTS="[]" - -REVIEW_COMMENTS=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/pulls/$PR_NUMBER/comments" \ - --paginate --jq '[.[] | {author: .user.login, created_at: .created_at, body: .body}]' 2>/dev/null) || REVIEW_COMMENTS="[]" - -# Also fetch PR review submissions (APPROVE, REQUEST_CHANGES, COMMENT reviews) -# These are different from inline review comments and may be the only feedback from some bots -PR_REVIEWS=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/pulls/$PR_NUMBER/reviews" \ - --paginate --jq '[.[] | {author: .user.login, created_at: .submitted_at, body: .body, state: .state}]' 2>/dev/null) || PR_REVIEWS="[]" - -# Combine all comments and reviews -ALL_COMMENTS=$(echo "$ISSUE_COMMENTS $REVIEW_COMMENTS $PR_REVIEWS" | jq -s 'add // []') - -# ======================================== -# Analyze Comments by Bot -# ======================================== - -declare -a REVIEWERS_COMMENTED=() -declare -a REVIEWERS_MISSING=() -declare -a REVIEWERS_STALE=() # Bots whose latest review is before latest commit -NEWEST_REVIEW_AT="" - -for bot in "${BOTS[@]}"; do - author=$(map_bot_to_author "$bot") - - # Check if this bot has any comments - BOT_COMMENTS=$(echo "$ALL_COMMENTS" | jq --arg author "$author" '[.[] | select(.author == $author)]') - BOT_COUNT=$(echo "$BOT_COMMENTS" | jq 'length') - - if [[ "$BOT_COUNT" -gt 0 ]]; then - REVIEWERS_COMMENTED+=("$bot") - - # Track this bot's newest review timestamp - BOT_NEWEST=$(echo "$BOT_COMMENTS" | jq -r 'sort_by(.created_at) | reverse | .[0].created_at') - - # Check if this bot's review is stale (before latest commit) - # This is per-bot, not global - a bot's review can be stale even if another bot reviewed later - if [[ -n "$LATEST_COMMIT_AT" && -n "$BOT_NEWEST" && "$LATEST_COMMIT_AT" > "$BOT_NEWEST" ]]; then - REVIEWERS_STALE+=("$bot") - fi - - # Track global newest for output (still useful for debugging) - if [[ -z "$NEWEST_REVIEW_AT" ]] || [[ "$BOT_NEWEST" > "$NEWEST_REVIEW_AT" ]]; then - NEWEST_REVIEW_AT="$BOT_NEWEST" - fi - else - REVIEWERS_MISSING+=("$bot") - fi -done - -# ======================================== -# Determine Case -# ======================================== - -CASE=0 -HAS_COMMITS_AFTER_REVIEWS=false - -# Count how many bots have commented -COMMENTED_COUNT=${#REVIEWERS_COMMENTED[@]} -MISSING_COUNT=${#REVIEWERS_MISSING[@]} -STALE_COUNT=${#REVIEWERS_STALE[@]} -TOTAL_BOTS=${#BOTS[@]} - -if [[ $COMMENTED_COUNT -eq 0 ]]; then - # Case 1: No reviewer comments at all - CASE=1 -elif [[ $MISSING_COUNT -gt 0 ]]; then - # Some (not all) reviewers commented - # Check if ANY bot that commented has a stale review (per-bot check) - if [[ $STALE_COUNT -gt 0 ]]; then - # Case 5: Some reviewers commented, but at least one has stale review - HAS_COMMITS_AFTER_REVIEWS=true - CASE=5 - else - # Case 2: Some reviewers commented, all reviews are fresh - CASE=2 - fi -else - # All reviewers have commented - # Check if ANY bot has a stale review (per-bot check, not global newest) - if [[ $STALE_COUNT -gt 0 ]]; then - # Case 4: All reviewers commented, but at least one has stale review - HAS_COMMITS_AFTER_REVIEWS=true - CASE=4 - else - # Case 3: All commented, all reviews are fresh - CASE=3 - fi -fi - -# ======================================== -# Output JSON -# ======================================== - -# Build JSON arrays -COMMENTED_JSON=$(printf '%s\n' "${REVIEWERS_COMMENTED[@]}" | jq -R . | jq -s .) -MISSING_JSON=$(printf '%s\n' "${REVIEWERS_MISSING[@]}" | jq -R . | jq -s .) - -# Handle empty arrays -[[ ${#REVIEWERS_COMMENTED[@]} -eq 0 ]] && COMMENTED_JSON="[]" -[[ ${#REVIEWERS_MISSING[@]} -eq 0 ]] && MISSING_JSON="[]" - -jq -n \ - --argjson case "$CASE" \ - --argjson reviewers_commented "$COMMENTED_JSON" \ - --argjson reviewers_missing "$MISSING_JSON" \ - --arg latest_commit_sha "$LATEST_COMMIT_SHA" \ - --arg latest_commit_at "$LATEST_COMMIT_AT" \ - --arg newest_review_at "${NEWEST_REVIEW_AT:-null}" \ - --argjson has_commits_after_reviews "$HAS_COMMITS_AFTER_REVIEWS" \ - '{ - case: $case, - reviewers_commented: $reviewers_commented, - reviewers_missing: $reviewers_missing, - latest_commit_sha: $latest_commit_sha, - latest_commit_at: $latest_commit_at, - newest_review_at: (if $newest_review_at == "null" then null else $newest_review_at end), - has_commits_after_reviews: $has_commits_after_reviews - }' diff --git a/scripts/fetch-pr-comments.sh b/scripts/fetch-pr-comments.sh deleted file mode 100755 index b4e892b1..00000000 --- a/scripts/fetch-pr-comments.sh +++ /dev/null @@ -1,452 +0,0 @@ -#!/usr/bin/env bash -# -# Fetch PR comments from GitHub -# -# Fetches all types of PR comments: -# - Issue comments (general comments on the PR) -# - Review comments (inline code comments) -# - PR reviews (summary reviews with approval/rejection status) -# -# Usage: -# fetch-pr-comments.sh [--after ] -# -# Output: Formatted markdown file with all comments -# - -set -euo pipefail - -# ======================================== -# Parse Arguments -# ======================================== - -PR_NUMBER="" -OUTPUT_FILE="" -AFTER_TIMESTAMP="" -ACTIVE_BOTS="" # Comma-separated list of active bots for grouping - -while [[ $# -gt 0 ]]; do - case $1 in - --after) - if [[ -z "${2:-}" ]]; then - echo "Error: --after requires a timestamp argument" >&2 - exit 1 - fi - AFTER_TIMESTAMP="$2" - shift 2 - ;; - --bots) - if [[ -z "${2:-}" ]]; then - echo "Error: --bots requires a comma-separated list of bot names" >&2 - exit 1 - fi - ACTIVE_BOTS="$2" - shift 2 - ;; - -h|--help) - cat << 'HELP_EOF' -fetch-pr-comments.sh - Fetch PR comments from GitHub - -USAGE: - fetch-pr-comments.sh [OPTIONS] - -ARGUMENTS: - The PR number to fetch comments from - Path to write the formatted comments - -OPTIONS: - --after Only include comments after this ISO 8601 timestamp - --bots Comma-separated list of active bots for grouping - -h, --help Show this help message - -OUTPUT FORMAT: - The output file contains markdown-formatted comments with: - - Comment type (issue comment, review comment, PR review) - - Author (with [bot] indicator for bot accounts) - - Timestamp - - Content - - Comments are deduplicated by ID and sorted newest first. - Human comments come before bot comments. - If --bots is provided, bot comments are grouped by bot. -HELP_EOF - exit 0 - ;; - -*) - echo "Error: Unknown option: $1" >&2 - exit 1 - ;; - *) - if [[ -z "$PR_NUMBER" ]]; then - PR_NUMBER="$1" - elif [[ -z "$OUTPUT_FILE" ]]; then - OUTPUT_FILE="$1" - else - echo "Error: Unexpected argument: $1" >&2 - exit 1 - fi - shift - ;; - esac -done - -# Validate arguments -if [[ -z "$PR_NUMBER" ]]; then - echo "Error: PR number is required" >&2 - exit 1 -fi - -if [[ -z "$OUTPUT_FILE" ]]; then - echo "Error: Output file is required" >&2 - exit 1 -fi - -if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Error: Invalid PR number: $PR_NUMBER" >&2 - exit 1 -fi - -# ======================================== -# Check Prerequisites -# ======================================== - -if ! command -v gh &>/dev/null; then - echo "Error: GitHub CLI (gh) is required" >&2 - exit 1 -fi - -if ! command -v jq &>/dev/null; then - echo "Error: jq is required for JSON parsing" >&2 - exit 1 -fi - -# ======================================== -# Get Repository Info -# ======================================== - -# IMPORTANT: For fork PRs, we need to resolve the base (upstream) repository -# gh pr view without --repo fails in forks because the PR number doesn't exist there -# Strategy: First get current repo, check if PR exists there, then try parent repo for forks - -# Step 1: Get the current repo (works in both forks and base repos) -CURRENT_REPO=$(gh repo view --json owner,name -q '.owner.login + "/" + .name' 2>/dev/null) || { - echo "Error: Failed to get current repository" >&2 - exit 1 -} - -# Step 2: Determine the correct repo for PR operations -# Try current repo first - if PR exists there, use it -PR_BASE_REPO="" -if gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" -else - # PR not found in current repo - check if this is a fork and try parent repo - PARENT_REPO=$(gh repo view --json parent -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - fi - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - echo "Error: Failed to find PR #$PR_NUMBER in current or parent repository" >&2 - exit 1 -fi - -REPO_OWNER="${PR_BASE_REPO%%/*}" -REPO_NAME="${PR_BASE_REPO##*/}" - -if [[ -z "$REPO_OWNER" || -z "$REPO_NAME" ]]; then - echo "Error: Could not parse repository owner/name from: $PR_BASE_REPO" >&2 - exit 1 -fi - -# ======================================== -# Fetch Comments -# ======================================== - -# Create temporary files for each comment type -TEMP_DIR=$(mktemp -d) -trap 'rm -rf "$TEMP_DIR"' EXIT - -ISSUE_COMMENTS_FILE="$TEMP_DIR/issue_comments.json" -REVIEW_COMMENTS_FILE="$TEMP_DIR/review_comments.json" -PR_REVIEWS_FILE="$TEMP_DIR/pr_reviews.json" - -# Retry configuration -MAX_RETRIES=3 -RETRY_DELAY=2 - -# Track API failures for strict mode -API_FAILURES=0 - -# Function to fetch with retries -fetch_with_retry() { - local endpoint="$1" - local output_file="$2" - local description="$3" - local attempt=1 - - while [[ $attempt -le $MAX_RETRIES ]]; do - if gh api "$endpoint" --paginate > "$output_file" 2>/dev/null; then - return 0 - fi - - if [[ $attempt -lt $MAX_RETRIES ]]; then - echo "Warning: Failed to fetch $description (attempt $attempt/$MAX_RETRIES), retrying in ${RETRY_DELAY}s..." >&2 - sleep "$RETRY_DELAY" - else - echo "ERROR: Failed to fetch $description after $MAX_RETRIES attempts" >&2 - echo "[]" > "$output_file" - API_FAILURES=$((API_FAILURES + 1)) - # Return 0 so script continues under set -euo pipefail - # API_FAILURES counter tracks failures for strict mode if needed - return 0 - fi - ((attempt++)) - done -} - -# Fetch issue comments (general PR comments) -# claude[bot] typically posts here -fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/issues/$PR_NUMBER/comments" "$ISSUE_COMMENTS_FILE" "issue comments" - -# Fetch PR review comments (inline code comments) -# codex (chatgpt-codex-connector[bot]) typically posts inline comments here -fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/pulls/$PR_NUMBER/comments" "$REVIEW_COMMENTS_FILE" "PR review comments" - -# Fetch PR reviews (summary reviews with approval status) -# Both bots may post summary reviews here -fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/pulls/$PR_NUMBER/reviews" "$PR_REVIEWS_FILE" "PR reviews" - -# ======================================== -# Process and Format Comments -# ======================================== - -# Function to check if user is a bot -is_bot() { - local user_type="$1" - local user_login="$2" - - if [[ "$user_type" == "Bot" ]] || [[ "$user_login" == *"[bot]" ]]; then - echo "true" - else - echo "false" - fi -} - -# Function to format timestamp for comparison -format_timestamp() { - local ts="$1" - # Remove trailing Z and convert to comparable format - echo "$ts" | sed 's/Z$//' | tr 'T' ' ' -} - -# Initialize output file -cat > "$OUTPUT_FILE" << EOF -# PR Comments for #$PR_NUMBER - -Fetched at: $(date -u +%Y-%m-%dT%H:%M:%SZ) -Repository: $REPO_OWNER/$REPO_NAME - ---- - -EOF - -# Process all comments into a unified format -# Create a combined JSON with all comments -ALL_COMMENTS_FILE="$TEMP_DIR/all_comments.json" - -# Process issue comments -jq -r --arg type "issue_comment" ' - if type == "array" then - .[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .created_at, - updated_at: .updated_at, - body: .body, - path: null, - line: null, - state: null - } - else - empty - end -' "$ISSUE_COMMENTS_FILE" > "$TEMP_DIR/issue_processed.jsonl" 2>/dev/null || true - -# Process review comments (inline) -jq -r --arg type "review_comment" ' - if type == "array" then - .[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .created_at, - updated_at: .updated_at, - body: .body, - path: .path, - line: (.line // .original_line), - state: null - } - else - empty - end -' "$REVIEW_COMMENTS_FILE" > "$TEMP_DIR/review_processed.jsonl" 2>/dev/null || true - -# Process PR reviews -# Note: Include all reviews, even those with empty body (e.g. approval-only reviews) -# For empty body reviews, use a placeholder indicating the state -jq -r --arg type "pr_review" ' - if type == "array" then - .[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .submitted_at, - updated_at: .submitted_at, - body: (if .body == null or .body == "" then "[Review state: \(.state)]" else .body end), - path: null, - line: null, - state: .state - } - else - empty - end -' "$PR_REVIEWS_FILE" > "$TEMP_DIR/reviews_processed.jsonl" 2>/dev/null || true - -# Combine all processed comments and deduplicate by id -cat "$TEMP_DIR/issue_processed.jsonl" "$TEMP_DIR/review_processed.jsonl" "$TEMP_DIR/reviews_processed.jsonl" 2>/dev/null | \ - jq -s 'unique_by(.id)' > "$ALL_COMMENTS_FILE" - -# Filter by timestamp if provided -if [[ -n "$AFTER_TIMESTAMP" ]]; then - jq --arg after "$AFTER_TIMESTAMP" ' - [.[] | select(.created_at > $after)] - ' "$ALL_COMMENTS_FILE" > "$TEMP_DIR/filtered.json" - mv "$TEMP_DIR/filtered.json" "$ALL_COMMENTS_FILE" -fi - -# Sort: human comments first, then by timestamp (newest first) -# Uses fromdateiso8601 for proper ISO 8601 timestamp parsing -# Filter out entries with null created_at to avoid fromdateiso8601 errors -jq ' - [.[] | select(.created_at != null)] | - sort_by( - (if .author_type == "Bot" or (.author | test("\\[bot\\]$")) then 1 else 0 end), - -(.created_at | fromdateiso8601) - ) -' "$ALL_COMMENTS_FILE" > "$TEMP_DIR/sorted.json" - -# Format comments into markdown -COMMENT_COUNT=$(jq 'length' "$TEMP_DIR/sorted.json") - -if [[ "$COMMENT_COUNT" == "0" ]]; then - cat >> "$OUTPUT_FILE" << EOF -*No comments found.* - ---- - -This PR has no review comments yet from the monitored bots. -EOF -else - # Add section headers - echo "## Human Comments" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - - # First pass: human comments - HUMAN_COMMENTS=$(jq -r ' - .[] | select(.author_type != "Bot" and (.author | test("\\[bot\\]$") | not)) | - "### Comment from \(.author)\n\n" + - "- **Type**: \(.type | gsub("_"; " "))\n" + - "- **Time**: \(.created_at)\n" + - (if .path then "- **File**: `\(.path)`\(if .line then " (line \(.line))" else "" end)\n" else "" end) + - (if .state then "- **Status**: \(.state)\n" else "" end) + - "\n\(.body)\n\n---\n" - ' "$TEMP_DIR/sorted.json" 2>/dev/null || true) - - if [[ -n "$HUMAN_COMMENTS" ]]; then - echo "$HUMAN_COMMENTS" >> "$OUTPUT_FILE" - else - echo "*No human comments.*" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - fi - - echo "" >> "$OUTPUT_FILE" - - # Second pass: bot comments - if [[ -n "$ACTIVE_BOTS" ]]; then - # Map bot names to GitHub comment author names: - # - claude -> claude[bot] - # - codex -> chatgpt-codex-connector[bot] - map_bot_to_author() { - local bot="$1" - case "$bot" in - codex) echo "chatgpt-codex-connector[bot]" ;; - *) echo "${bot}[bot]" ;; - esac - } - - # Group bot comments by active bots - echo "## Bot Comments (Grouped by Bot)" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - - IFS=',' read -ra BOT_ARRAY <<< "$ACTIVE_BOTS" - for bot in "${BOT_ARRAY[@]}"; do - bot=$(echo "$bot" | tr -d ' ') - author=$(map_bot_to_author "$bot") - echo "### Comments from ${author}" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - - BOT_COMMENTS=$(jq -r --arg author "$author" ' - [.[] | select(.author == $author)] | - if length == 0 then - "*No comments from this bot.*\n" - else - .[] | - "#### Comment\n\n" + - "- **Type**: \(.type | gsub("_"; " "))\n" + - "- **Time**: \(.created_at)\n" + - (if .path then "- **File**: `\(.path)`\(if .line then " (line \(.line))" else "" end)\n" else "" end) + - (if .state then "- **Status**: \(.state)\n" else "" end) + - "\n\(.body)\n\n---\n" - end - ' "$TEMP_DIR/sorted.json" 2>/dev/null || echo "*Error reading comments.*") - - echo "$BOT_COMMENTS" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - done - else - # Default: all bot comments together - echo "## Bot Comments" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - - jq -r ' - .[] | select(.author_type == "Bot" or (.author | test("\\[bot\\]$"))) | - "### Comment from \(.author)\n\n" + - "- **Type**: \(.type | gsub("_"; " "))\n" + - "- **Time**: \(.created_at)\n" + - (if .path then "- **File**: `\(.path)`\(if .line then " (line \(.line))" else "" end)\n" else "" end) + - (if .state then "- **Status**: \(.state)\n" else "" end) + - "\n\(.body)\n\n---\n" - ' "$TEMP_DIR/sorted.json" >> "$OUTPUT_FILE" 2>/dev/null || true - fi -fi - -echo "" >> "$OUTPUT_FILE" -echo "---" >> "$OUTPUT_FILE" -echo "" >> "$OUTPUT_FILE" -echo "*End of comments*" >> "$OUTPUT_FILE" - -# Report API failures (non-fatal but logged) -if [[ $API_FAILURES -gt 0 ]]; then - echo "WARNING: $API_FAILURES API endpoint(s) failed after retries. Some comments may be missing." >&2 - echo "" >> "$OUTPUT_FILE" - echo "**Warning:** Some API calls failed. Comments may be incomplete." >> "$OUTPUT_FILE" -fi - -exit 0 diff --git a/scripts/humanize.sh b/scripts/humanize.sh index c5ac3f20..346c1802 100755 --- a/scripts/humanize.sh +++ b/scripts/humanize.sh @@ -1176,9 +1176,6 @@ humanize() { rlcr) _humanize_monitor_codex "$@" ;; - pr) - _humanize_monitor_pr "$@" - ;; skill) _humanize_monitor_skill "$@" ;; @@ -1189,11 +1186,10 @@ humanize() { _humanize_monitor_skill --tool-filter gemini "$@" ;; *) - echo "Usage: humanize monitor " + echo "Usage: humanize monitor " echo "" echo "Subcommands:" echo " rlcr Monitor the latest RLCR loop log from .humanize/rlcr" - echo " pr Monitor the latest PR loop from .humanize/pr-loop" echo " skill Monitor all skill invocations (codex + gemini)" echo " codex Monitor ask-codex skill invocations only" echo " gemini Monitor ask-gemini skill invocations only" @@ -1212,7 +1208,6 @@ humanize() { echo "" echo "Commands:" echo " monitor rlcr Monitor the latest RLCR loop log" - echo " monitor pr Monitor the latest PR loop" echo " monitor skill Monitor all skill invocations (codex + gemini)" echo " monitor codex Monitor ask-codex skill invocations only" echo " monitor gemini Monitor ask-gemini skill invocations only" @@ -1221,442 +1216,6 @@ humanize() { esac } -# ======================================== -# PR Loop Monitor Function -# ======================================== - -# Monitor the latest PR loop from .humanize/pr-loop with fixed status bar and rolling tail -_humanize_monitor_pr() { - # Enable 0-indexed arrays in zsh for bash compatibility - [[ -n "${ZSH_VERSION:-}" ]] && setopt localoptions ksharrays - - local loop_dir=".humanize/pr-loop" - local current_file="" - local current_session_dir="" - local check_interval=2 # seconds between checking for new files - local status_bar_height=10 # number of lines for status bar - local once_mode=false - - # Parse arguments - while [[ $# -gt 0 ]]; do - case "$1" in - --once) - once_mode=true - shift - ;; - *) - shift - ;; - esac - done - - # Check if .humanize/pr-loop exists - if [[ ! -d "$loop_dir" ]]; then - echo "Error: $loop_dir directory not found in current directory" - echo "Are you in a project with an active PR loop?" - return 1 - fi - - # Use shared monitor helper for finding latest session - _pr_find_latest_session() { - monitor_find_latest_session "$loop_dir" - } - - # Function to find the latest monitorable file (pr-check, pr-feedback, or pr-comment) - _pr_find_latest_file() { - local session_dir="$1" - [[ ! -d "$session_dir" ]] && return - - local latest="" - local latest_mtime=0 - - # Check for pr-check files (Codex analysis output) - while IFS= read -r f; do - [[ -z "$f" ]] && continue - [[ ! -f "$f" ]] && continue - local mtime=$(stat -c %Y "$f" 2>/dev/null || stat -f %m "$f" 2>/dev/null || echo 0) - if [[ "$mtime" -gt "$latest_mtime" ]]; then - latest="$f" - latest_mtime="$mtime" - fi - done < <(find "$session_dir" -maxdepth 1 -name 'round-*-pr-check.md' -type f 2>/dev/null) - - # Check for pr-feedback files - while IFS= read -r f; do - [[ -z "$f" ]] && continue - [[ ! -f "$f" ]] && continue - local mtime=$(stat -c %Y "$f" 2>/dev/null || stat -f %m "$f" 2>/dev/null || echo 0) - if [[ "$mtime" -gt "$latest_mtime" ]]; then - latest="$f" - latest_mtime="$mtime" - fi - done < <(find "$session_dir" -maxdepth 1 -name 'round-*-pr-feedback.md' -type f 2>/dev/null) - - # Check for pr-comment files - while IFS= read -r f; do - [[ -z "$f" ]] && continue - [[ ! -f "$f" ]] && continue - local mtime=$(stat -c %Y "$f" 2>/dev/null || stat -f %m "$f" 2>/dev/null || echo 0) - if [[ "$mtime" -gt "$latest_mtime" ]]; then - latest="$f" - latest_mtime="$mtime" - fi - done < <(find "$session_dir" -maxdepth 1 -name 'round-*-pr-comment.md' -type f 2>/dev/null) - - echo "$latest" - } - - # Use shared monitor helper for finding state file - # Note: monitor_find_state_file returns "approve" not "approved" for approve-state.md - # so we maintain the PR-specific status mapping here for display purposes - _pr_find_state_file() { - local session_dir="$1" - local result - result=$(monitor_find_state_file "$session_dir") - local state_file="${result%|*}" - local stop_reason="${result#*|}" - - # Map stop reasons to PR-friendly status names - case "$stop_reason" in - approve) stop_reason="approved" ;; - maxiter) stop_reason="max-iterations" ;; - esac - - echo "$state_file|$stop_reason" - } - - # Function to parse state.md and return key values - _pr_parse_state_md() { - local state_file="$1" - [[ ! -f "$state_file" ]] && echo "0|42|?|?|?|?|N/A" && return - - local frontmatter - frontmatter=$(sed -n '/^---$/,/^---$/{ /^---$/d; p; }' "$state_file" 2>/dev/null || echo "") - - local current_round=$(echo "$frontmatter" | grep "^current_round:" | sed "s/current_round: *//" | tr -d ' ') - local max_iterations=$(echo "$frontmatter" | grep "^max_iterations:" | sed "s/max_iterations: *//" | tr -d ' ') - local pr_number=$(echo "$frontmatter" | grep "^pr_number:" | sed "s/pr_number: *//" | tr -d ' ') - local start_branch=$(echo "$frontmatter" | grep "^start_branch:" | sed "s/start_branch: *//" | tr -d '"' || true) - local configured_bots=$(echo "$frontmatter" | sed -n '/^configured_bots:$/,/^[a-z_]*:/{ /^ - /{ s/^ - //; p; } }' | tr '\n' ',' | sed 's/,$//') - local active_bots=$(echo "$frontmatter" | sed -n '/^active_bots:$/,/^[a-z_]*:/{ /^ - /{ s/^ - //; p; } }' | tr '\n' ',' | sed 's/,$//') - local codex_model=$(echo "$frontmatter" | grep "^codex_model:" | sed "s/codex_model: *//" | tr -d ' ') - local codex_effort=$(echo "$frontmatter" | grep "^codex_effort:" | sed "s/codex_effort: *//" | tr -d ' ') - local started_at=$(echo "$frontmatter" | grep "^started_at:" | sed "s/started_at: *//" || true) - - # Apply defaults - current_round=${current_round:-0} - max_iterations=${max_iterations:-42} - pr_number=${pr_number:-"?"} - start_branch=${start_branch:-"?"} - configured_bots=${configured_bots:-"none"} - active_bots=${active_bots:-"none"} - codex_model=${codex_model:-"$DEFAULT_CODEX_MODEL"} - codex_effort=${codex_effort:-"medium"} - started_at=${started_at:-"N/A"} - - echo "$current_round|$max_iterations|$pr_number|$start_branch|$configured_bots|$active_bots|$codex_model|$codex_effort|$started_at" - } - - # Draw the status bar at the top - _pr_draw_status_bar() { - local session_dir="$1" - local monitored_file="$2" - local loop_status="$3" - local term_width=$(tput cols) - - # Parse state file - local state_info=$(_pr_find_state_file "$session_dir") - local state_file="${state_info%|*}" - [[ -z "$loop_status" ]] && loop_status="${state_info#*|}" - - local state_values=$(_pr_parse_state_md "$state_file") - IFS='|' read -r current_round max_iterations pr_number start_branch configured_bots active_bots codex_model codex_effort started_at <<< "$state_values" - - # Save cursor position and move to top - tput sc - - # ANSI color codes - local green="\033[1;32m" yellow="\033[1;33m" cyan="\033[1;36m" - local magenta="\033[1;35m" red="\033[1;31m" reset="\033[0m" - local bg="\033[44m" bold="\033[1m" dim="\033[2m" - local clr_eol="\033[K" # Clear to end of line (reduces flicker vs clearing entire area) - - # Move to top and draw directly (no pre-clearing to avoid flicker) - tput cup 0 0 - local session_basename=$(basename "$session_dir") - printf "${bg}${bold}%-${term_width}s${reset}${clr_eol}\n" " PR Loop Monitor" - printf "${cyan}Session:${reset} %s ${cyan}PR:${reset} #%s ${cyan}Branch:${reset} %s${clr_eol}\n" "$session_basename" "$pr_number" "$start_branch" - printf "${green}Round:${reset} ${bold}%s${reset} / %s ${yellow}Codex:${reset} %s (%s)${clr_eol}\n" "$current_round" "$max_iterations" "$codex_model" "$codex_effort" - - # Detect phase and determine status color - local phase="" - local phase_display="" - if type get_pr_loop_phase &>/dev/null; then - phase=$(get_pr_loop_phase "$session_dir") - phase_display=$(get_pr_loop_phase_display "$phase" "$active_bots") - fi - - # Loop status line with color based on phase/status - local status_color="${green}" - case "$phase" in - approved) status_color="${cyan}" ;; - cancelled) status_color="${yellow}" ;; - maxiter) status_color="${red}" ;; - codex_analyzing) status_color="${magenta}" ;; - waiting_initial_review) status_color="${yellow}" ;; - waiting_reviewer) status_color="${green}" ;; - *) status_color="${dim}" ;; - esac - - if [[ -n "$phase_display" ]]; then - printf "${magenta}Phase:${reset} ${status_color}%s${reset}${clr_eol}\n" "$phase_display" - else - # Fallback to loop_status if phase detection not available - case "$loop_status" in - active) status_color="${green}" ;; - approved|completed) status_color="${cyan}" ;; - cancelled) status_color="${yellow}" ;; - max-iterations) status_color="${red}" ;; - *) status_color="${dim}" ;; - esac - printf "${magenta}Status:${reset} ${status_color}%s${reset}${clr_eol}\n" "$loop_status" - fi - - # Bot status - printf "${cyan}Configured Bots:${reset} %s${clr_eol}\n" "$configured_bots" - if [[ "$active_bots" == "none" ]] || [[ -z "$active_bots" ]]; then - printf "${green}Active Bots:${reset} ${green}all approved${reset}${clr_eol}\n" - else - printf "${yellow}Active Bots:${reset} %s${clr_eol}\n" "$active_bots" - fi - - # Goal tracker issue stats - local goal_tracker_file="$session_dir/goal-tracker.md" - if [[ -f "$goal_tracker_file" ]] && type humanize_parse_pr_goal_tracker &>/dev/null; then - local tracker_stats=$(humanize_parse_pr_goal_tracker "$goal_tracker_file") - local total_issues resolved_issues remaining_issues last_reviewer - IFS='|' read -r total_issues resolved_issues remaining_issues last_reviewer <<< "$tracker_stats" - if [[ "$total_issues" != "0" ]] || [[ "$resolved_issues" != "0" ]]; then - printf "${cyan}Issues:${reset} Found: ${yellow}%s${reset}, Resolved: ${green}%s${reset}, Remaining: ${red}%s${reset}${clr_eol}\n" "$total_issues" "$resolved_issues" "$remaining_issues" - fi - fi - - # Started time - local start_display="$started_at" - if [[ "$started_at" != "N/A" ]]; then - start_display=$(echo "$started_at" | sed 's/T/ /; s/Z/ UTC/') - fi - printf "${dim}Started:${reset} %s${clr_eol}\n" "$start_display" - - # Currently monitoring - local file_basename="" - [[ -n "$monitored_file" ]] && file_basename=$(basename "$monitored_file") - printf "${dim}Watching:${reset} %s${clr_eol}\n" "${file_basename:-none}" - - # Separator - printf "%-${term_width}s${clr_eol}\n" "$(printf '%*s' "$term_width" | tr ' ' '-')" - - # Restore cursor position - tput rc - } - - # Track state for cleanup - local TAIL_PID="" - local monitor_running=true - local cleanup_done=false - - # Cleanup function - called by trap - # Must work cleanly in both bash and zsh - _pr_cleanup() { - # Prevent multiple cleanup calls - [[ "${cleanup_done:-false}" == "true" ]] && return - cleanup_done=true - monitor_running=false - - # Reset traps to prevent re-triggering - trap - INT TERM EXIT 2>/dev/null || true - - # Kill background tail if running - if [[ -n "${TAIL_PID:-}" ]]; then - if kill -0 "$TAIL_PID" 2>/dev/null; then - kill "$TAIL_PID" 2>/dev/null || true - # Use timeout-safe wait - ( wait "$TAIL_PID" 2>/dev/null ) & - wait $! 2>/dev/null || true - fi - fi - - # Show cursor and restore terminal - tput cnorm 2>/dev/null || true - tput rmcup 2>/dev/null || true - echo "" - echo "Monitor stopped." - } - - # Set up signal handlers (bash/zsh compatible) - # Use TRAPINT/TRAPTERM for zsh, standard trap for bash - if [[ -n "${ZSH_VERSION:-}" ]]; then - # zsh: use TRAPINT and TRAPTERM for better handling - TRAPINT() { _pr_cleanup; return 130; } - TRAPTERM() { _pr_cleanup; return 143; } - # Also set EXIT trap for clean exit - trap '_pr_cleanup' EXIT - else - # bash: use standard trap - trap '_pr_cleanup' EXIT INT TERM - fi - - # One-shot mode: print status once and exit (for testing and scripting) - if [[ "$once_mode" == "true" ]]; then - local session_dir=$(_pr_find_latest_session) - if [[ -z "$session_dir" ]]; then - echo "No PR loop sessions found in $loop_dir" - return 1 - fi - - local state_info=$(_pr_find_state_file "$session_dir") - local state_file="${state_info%|*}" - local loop_status="${state_info#*|}" - - if [[ -z "$state_file" ]]; then - echo "No state file found in $session_dir" - return 1 - fi - - local state_values=$(_pr_parse_state_md "$state_file") - IFS='|' read -r current_round max_iterations pr_number start_branch configured_bots active_bots codex_model codex_effort started_at <<< "$state_values" - - # Get phase for --once mode display - local phase="" - local phase_display="" - if declare -f get_pr_loop_phase &>/dev/null; then - phase=$(get_pr_loop_phase "$session_dir") - phase_display=$(get_pr_loop_phase_display "$phase" "$active_bots") - fi - - echo "==========================================" - echo " PR Loop Monitor" - echo "==========================================" - echo "" - echo "Session: $(basename "$session_dir")" - if [[ -n "$phase_display" ]]; then - echo "Phase: $phase_display" - else - echo "Status: $loop_status" - fi - echo "" - echo "PR Number: #$pr_number" - echo "Branch: $start_branch" - echo "Configured Bots: ${configured_bots:-none}" - echo "Active Bots: ${active_bots:-none}" - echo "" - echo "Round: $current_round / $max_iterations" - echo "Codex: $codex_model:$codex_effort" - echo "Started: $started_at" - echo "" - echo "==========================================" - echo " Recent Files" - echo "==========================================" - echo "" - - # List recent round files - local round_files - round_files=$(find "$session_dir" -maxdepth 1 -name 'round-*.md' -type f 2>/dev/null) - if [[ -n "$round_files" ]]; then - echo "$round_files" | xargs ls -lt 2>/dev/null | head -10 | while read -r line; do - echo " $line" - done - fi - - echo "" - echo "==========================================" - echo " Latest Activity" - echo "==========================================" - echo "" - - local latest_file=$(_pr_find_latest_file "$session_dir") - if [[ -n "$latest_file" && -f "$latest_file" ]]; then - echo "Latest: $(basename "$latest_file")" - echo "----------------------------------------" - tail -20 "$latest_file" - echo "" - fi - - echo "==========================================" - return 0 - fi - - # Initialize terminal - tput smcup # Save screen - tput civis # Hide cursor - clear - - # Create scrolling region below status bar - tput csr $status_bar_height $(($(tput lines) - 1)) - - # Main monitoring loop - while [[ "$monitor_running" == "true" ]]; do - # Find latest session - local session_dir=$(_pr_find_latest_session) - if [[ -z "$session_dir" ]]; then - tput cup $status_bar_height 0 - echo "Waiting for PR loop session..." - sleep "$check_interval" - continue - fi - - # Check if session changed - if [[ "$session_dir" != "$current_session_dir" ]]; then - current_session_dir="$session_dir" - current_file="" - [[ -n "$TAIL_PID" ]] && kill "$TAIL_PID" 2>/dev/null - TAIL_PID="" - fi - - # Find latest file to monitor - local latest_file=$(_pr_find_latest_file "$session_dir") - - # Get loop status - local state_info=$(_pr_find_state_file "$session_dir") - local loop_status="${state_info#*|}" - - # Update status bar - _pr_draw_status_bar "$session_dir" "$latest_file" "$loop_status" - - # Check if file changed or new file appeared - if [[ "$latest_file" != "$current_file" ]] && [[ -n "$latest_file" ]]; then - current_file="$latest_file" - - # Kill old tail process - [[ -n "$TAIL_PID" ]] && kill "$TAIL_PID" 2>/dev/null - - # Clear content area and show new file - tput cup $status_bar_height 0 - tput ed # Clear to end of screen - - # Start tailing the new file - tail -n +1 -f "$current_file" 2>/dev/null & - TAIL_PID=$! - fi - - # If no file to monitor yet, show waiting message - if [[ -z "$current_file" ]]; then - tput cup $status_bar_height 0 - echo "Waiting for PR loop activity..." - fi - - sleep "$check_interval" - done - - # Reset trap handlers (zsh and bash) - if [[ -n "${ZSH_VERSION:-}" ]]; then - # zsh: undefine the TRAP* functions - unfunction TRAPINT TRAPTERM 2>/dev/null || true - else - trap - INT TERM EXIT - fi -} - # Source skill monitor (provides _humanize_monitor_skill) if [[ -f "$HUMANIZE_SCRIPT_DIR/lib/monitor-skill.sh" ]]; then source "$HUMANIZE_SCRIPT_DIR/lib/monitor-skill.sh" diff --git a/scripts/lib/monitor-common.sh b/scripts/lib/monitor-common.sh index 5409396d..671a3100 100644 --- a/scripts/lib/monitor-common.sh +++ b/scripts/lib/monitor-common.sh @@ -2,7 +2,7 @@ # # monitor-common.sh - Shared utilities for humanize monitor functions # -# This file contains common functions used by both RLCR and PR loop monitors. +# This file contains common functions used by humanize monitor functions. # It should be sourced by humanize.sh rather than executed directly. # ======================================== @@ -255,135 +255,6 @@ monitor_truncate_string() { fi } -# ======================================== -# PR Loop Phase Detection -# ======================================== - -# Detect current PR loop phase from file state -# Returns: one of: approved, cancelled, maxiter, codex_analyzing, waiting_initial_review, waiting_reviewer -# -# Usage: get_pr_loop_phase "/path/to/session" -# -# Detection strategy for codex_analyzing: -# 1. Find the latest round's pr-check.md file -# 2. Check if it's growing by comparing current size with cached previous size -# 3. Cache size in /tmp for comparison on next call -get_pr_loop_phase() { - local session_dir="$1" - - [[ ! -d "$session_dir" ]] && echo "unknown" && return - - # Check for final states first - [[ -f "$session_dir/approve-state.md" ]] && echo "approved" && return - [[ -f "$session_dir/cancel-state.md" ]] && echo "cancelled" && return - [[ -f "$session_dir/maxiter-state.md" ]] && echo "maxiter" && return - - # Check for Codex running by detecting file growth - # Find the highest numbered round pr-check file - local latest_check="" - local highest_round=-1 - while IFS= read -r f; do - [[ -z "$f" ]] && continue - local basename=$(basename "$f") - local round_str="${basename#round-}" - round_str="${round_str%-pr-check.md}" - if [[ "$round_str" =~ ^[0-9]+$ ]] && [[ "$round_str" -gt "$highest_round" ]]; then - highest_round="$round_str" - latest_check="$f" - fi - done < <(find "$session_dir" -maxdepth 1 -name 'round-*-pr-check.md' -type f 2>/dev/null) - - if [[ -n "$latest_check" ]]; then - # Get current file size - local current_size - current_size=$(stat -c%s "$latest_check" 2>/dev/null || stat -f%z "$latest_check" 2>/dev/null || echo 0) - - # Cache file for tracking size changes (unique per session) - local session_name=$(basename "$session_dir") - local cache_file="/tmp/humanize-phase-${session_name}-${highest_round}.size" - - # Read previous size from cache - local previous_size=0 - [[ -f "$cache_file" ]] && previous_size=$(cat "$cache_file" 2>/dev/null || echo 0) - - # Update cache with current size - echo "$current_size" > "$cache_file" 2>/dev/null || true - - # If file is growing OR is new (no previous record), Codex is analyzing - # Also check mtime as fallback (file modified in last 10 seconds) - local now_epoch file_epoch - now_epoch=$(date +%s) - file_epoch=$(stat -c %Y "$latest_check" 2>/dev/null || stat -f %m "$latest_check" 2>/dev/null || echo 0) - local age_seconds=$((now_epoch - file_epoch)) - - if [[ "$current_size" -gt "$previous_size" ]] || [[ "$age_seconds" -lt 10 ]]; then - echo "codex_analyzing" - return - fi - fi - - # Check state.md for round info - if [[ -f "$session_dir/state.md" ]]; then - local frontmatter - frontmatter=$(sed -n '/^---$/,/^---$/{ /^---$/d; p; }' "$session_dir/state.md" 2>/dev/null) - - local current_round - local startup_case - current_round=$(echo "$frontmatter" | grep "^current_round:" | sed "s/current_round: *//" | tr -d ' ') - startup_case=$(echo "$frontmatter" | grep "^startup_case:" | sed "s/startup_case: *//" | tr -d ' ') - - current_round=${current_round:-0} - startup_case=${startup_case:-1} - - if [[ "$current_round" -eq 0 && "$startup_case" -eq 1 ]]; then - echo "waiting_initial_review" - else - echo "waiting_reviewer" - fi - else - echo "unknown" - fi -} - -# Get human-readable description for PR loop phase -# Usage: get_pr_loop_phase_display "waiting_reviewer" "claude,codex" -get_pr_loop_phase_display() { - local phase="$1" - local active_bots="$2" - - case "$phase" in - approved) - echo "All reviews approved" - ;; - cancelled) - echo "Loop cancelled" - ;; - maxiter) - echo "Max iterations reached" - ;; - codex_analyzing) - echo "Codex analyzing reviews..." - ;; - waiting_initial_review) - if [[ -n "$active_bots" && "$active_bots" != "none" ]]; then - echo "Waiting for initial PR review from $active_bots" - else - echo "Waiting for initial PR review" - fi - ;; - waiting_reviewer) - if [[ -n "$active_bots" && "$active_bots" != "none" ]]; then - echo "Waiting for $active_bots (polling...)" - else - echo "Waiting for reviews (polling...)" - fi - ;; - *) - echo "Unknown phase" - ;; - esac -} - # ======================================== # Goal Tracker Parsing # ======================================== @@ -511,37 +382,3 @@ parse_goal_tracker() { echo "${total_acs}|${completed_acs}|${active_tasks}|${completed_tasks}|${deferred_tasks}|${open_issues}|${goal_summary}" } -# Parse PR goal-tracker.md for issue statistics -# Returns: total_issues|resolved_issues|remaining_issues|last_reviewer -# Usage: humanize_parse_pr_goal_tracker "/path/to/goal-tracker.md" -humanize_parse_pr_goal_tracker() { - local tracker_file="$1" - if [[ ! -f "$tracker_file" ]]; then - echo "0|0|0|none" - return - fi - - # Extract from Total Statistics section - # Format: - Total Issues Found: N - local total_issues - total_issues=$(grep -E "^- Total Issues Found:" "$tracker_file" | sed 's/.*: //' | tr -d ' ') - total_issues=${total_issues:-0} - - local resolved_issues - resolved_issues=$(grep -E "^- Total Issues Resolved:" "$tracker_file" | sed 's/.*: //' | tr -d ' ') - resolved_issues=${resolved_issues:-0} - - local remaining_issues - remaining_issues=$(grep -E "^- Remaining:" "$tracker_file" | sed 's/.*: //' | tr -d ' ') - remaining_issues=${remaining_issues:-0} - - # Get last reviewer from Issue Summary table (last row, Reviewer column) - # Table format: | ID | Reviewer | Round | Status | Description | - # Pattern matches rows like "|1|..." or "| 1 |..." (with or without spaces) - local last_reviewer - last_reviewer=$(sed -n '/## Issue Summary/,/^##/p' "$tracker_file" \ - | grep -E '^\|[[:space:]]*[0-9]+' | tail -1 | cut -d'|' -f3 | tr -d ' ') - last_reviewer=${last_reviewer:-none} - - echo "${total_issues}|${resolved_issues}|${remaining_issues}|${last_reviewer}" -} diff --git a/scripts/poll-pr-reviews.sh b/scripts/poll-pr-reviews.sh deleted file mode 100755 index 282899bd..00000000 --- a/scripts/poll-pr-reviews.sh +++ /dev/null @@ -1,328 +0,0 @@ -#!/usr/bin/env bash -# -# Poll for new PR reviews from specified bots -# -# Checks for new comments from specified bots after a given timestamp. -# -# Usage: -# poll-pr-reviews.sh --after --bots -# -# Output: JSON with new comments from the bots, or empty array if none -# - -set -euo pipefail - -# ======================================== -# Parse Arguments -# ======================================== - -PR_NUMBER="" -AFTER_TIMESTAMP="" -BOTS="" - -while [[ $# -gt 0 ]]; do - case $1 in - --after) - if [[ -z "${2:-}" ]]; then - echo "Error: --after requires a timestamp argument" >&2 - exit 1 - fi - AFTER_TIMESTAMP="$2" - shift 2 - ;; - --bots) - if [[ -z "${2:-}" ]]; then - echo "Error: --bots requires a comma-separated list of bot names" >&2 - exit 1 - fi - BOTS="$2" - shift 2 - ;; - -h|--help) - cat << 'HELP_EOF' -poll-pr-reviews.sh - Poll for new PR reviews from bots - -USAGE: - poll-pr-reviews.sh --after --bots - -ARGUMENTS: - The PR number to poll - -OPTIONS: - --after Only return comments after this ISO 8601 timestamp - --bots Comma-separated list of bot names to watch - -h, --help Show this help message - -OUTPUT: - JSON object with: - - comments: Array of new comments from watched bots - - bots_responded: Array of bot names that have new comments - - has_new_comments: Boolean indicating if any new comments found - -EXAMPLE: - poll-pr-reviews.sh 123 --after 2026-01-18T12:00:00Z --bots claude,codex -HELP_EOF - exit 0 - ;; - -*) - echo "Error: Unknown option: $1" >&2 - exit 1 - ;; - *) - if [[ -z "$PR_NUMBER" ]]; then - PR_NUMBER="$1" - else - echo "Error: Unexpected argument: $1" >&2 - exit 1 - fi - shift - ;; - esac -done - -# Validate arguments -if [[ -z "$PR_NUMBER" ]]; then - echo "Error: PR number is required" >&2 - exit 1 -fi - -if [[ -z "$AFTER_TIMESTAMP" ]]; then - echo "Error: --after timestamp is required" >&2 - exit 1 -fi - -if [[ -z "$BOTS" ]]; then - echo "Error: --bots list is required" >&2 - exit 1 -fi - -if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Error: Invalid PR number: $PR_NUMBER" >&2 - exit 1 -fi - -# ======================================== -# Check Prerequisites -# ======================================== - -if ! command -v gh &>/dev/null; then - echo "Error: GitHub CLI (gh) is required" >&2 - exit 1 -fi - -if ! command -v jq &>/dev/null; then - echo "Error: jq is required for JSON parsing" >&2 - exit 1 -fi - -# ======================================== -# Get Repository Info -# ======================================== - -# IMPORTANT: For fork PRs, we need to resolve the base (upstream) repository -# gh pr view without --repo fails in forks because the PR number doesn't exist there -# Strategy: First get current repo, then try to get PR's base repo with --repo flag - -# Step 1: Get the current repo (works in both forks and base repos) -CURRENT_REPO=$(gh repo view --json owner,name -q '.owner.login + "/" + .name' 2>/dev/null) || { - echo "Error: Failed to get current repository" >&2 - exit 1 -} - -# Step 2: Determine the correct repo for PR operations -# Try current repo first - if PR exists there, use it -PR_BASE_REPO="" -if gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" -else - # PR not found in current repo - check if this is a fork and try parent repo - PARENT_REPO=$(gh repo view --json parent -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - fi - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - echo "Error: Failed to find PR #$PR_NUMBER in current or parent repository" >&2 - exit 1 -fi - -REPO_OWNER="${PR_BASE_REPO%%/*}" -REPO_NAME="${PR_BASE_REPO##*/}" - -if [[ -z "$REPO_OWNER" || -z "$REPO_NAME" ]]; then - echo "Error: Could not parse repository owner/name from: $PR_BASE_REPO" >&2 - exit 1 -fi - -# ======================================== -# Build Bot Filter -# ======================================== - -# Map bot names to GitHub comment author names: -# - claude -> claude[bot] -# - codex -> chatgpt-codex-connector[bot] -map_bot_to_author() { - local bot="$1" - case "$bot" in - codex) echo "chatgpt-codex-connector[bot]" ;; - *) echo "${bot}[bot]" ;; - esac -} - -# Convert comma-separated bots to jq filter pattern -BOT_PATTERNS="" -IFS=',' read -ra BOT_ARRAY <<< "$BOTS" -for bot in "${BOT_ARRAY[@]}"; do - bot=$(echo "$bot" | tr -d ' ') - author=$(map_bot_to_author "$bot") - if [[ -n "$BOT_PATTERNS" ]]; then - BOT_PATTERNS="$BOT_PATTERNS|" - fi - # Escape brackets for regex - BOT_PATTERNS="${BOT_PATTERNS}${author//\[/\\[}" - BOT_PATTERNS="${BOT_PATTERNS//\]/\\]}" -done - -# ======================================== -# Fetch and Filter Comments -# ======================================== - -# Create temporary files -TEMP_DIR=$(mktemp -d) -trap 'rm -rf "$TEMP_DIR"' EXIT - -ALL_COMMENTS_FILE="$TEMP_DIR/all_comments.json" -FILTERED_FILE="$TEMP_DIR/filtered.json" - -# Retry configuration -MAX_RETRIES=3 -RETRY_DELAY=2 - -# Track API failures (for diagnostics, not script termination) -API_FAILURES=0 - -# Function to fetch with retries -# Returns 0 even on failure to prevent script termination under set -euo pipefail -# On failure, outputs empty array "[]" so jq processing continues gracefully -fetch_with_retry() { - local endpoint="$1" - local attempt=1 - local result="" - - while [[ $attempt -le $MAX_RETRIES ]]; do - result=$(gh api "$endpoint" --paginate 2>/dev/null) && { - echo "$result" - return 0 - } - - if [[ $attempt -lt $MAX_RETRIES ]]; then - echo "Warning: API fetch failed (attempt $attempt/$MAX_RETRIES), retrying..." >&2 - sleep "$RETRY_DELAY" - else - echo "Warning: API fetch failed after $MAX_RETRIES attempts for $endpoint" >&2 - API_FAILURES=$((API_FAILURES + 1)) - fi - ((attempt++)) - done - - # Return empty array and success (0) to allow polling to continue - # Partial API outages shouldn't terminate the entire poll loop - echo "[]" - return 0 -} - -# Initialize empty array -echo "[]" > "$ALL_COMMENTS_FILE" - -# Fetch issue comments -ISSUE_COMMENTS=$(fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/issues/$PR_NUMBER/comments") -echo "$ISSUE_COMMENTS" | jq -r --arg type "issue_comment" ' - if type == "array" then - [.[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .created_at, - body: .body - }] - else - [] - end -' > "$TEMP_DIR/issue.json" - -# Fetch review comments -REVIEW_COMMENTS=$(fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/pulls/$PR_NUMBER/comments") -echo "$REVIEW_COMMENTS" | jq -r --arg type "review_comment" ' - if type == "array" then - [.[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .created_at, - body: .body, - path: .path, - line: (.line // .original_line) - }] - else - [] - end -' > "$TEMP_DIR/review.json" - -# Fetch PR reviews -# Note: Include all reviews, even those with empty body (e.g. approval-only reviews) -# For empty body reviews, use a placeholder indicating the state -PR_REVIEWS=$(fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/pulls/$PR_NUMBER/reviews") -echo "$PR_REVIEWS" | jq -r --arg type "pr_review" ' - if type == "array" then - [.[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .submitted_at, - body: (if .body == null or .body == "" then "[Review state: \(.state)]" else .body end), - state: .state - }] - else - [] - end -' > "$TEMP_DIR/reviews.json" - -# Combine all comments -jq -s 'add' "$TEMP_DIR/issue.json" "$TEMP_DIR/review.json" "$TEMP_DIR/reviews.json" > "$ALL_COMMENTS_FILE" - -# Filter: after timestamp AND from watched bots -jq --arg after "$AFTER_TIMESTAMP" --arg pattern "$BOT_PATTERNS" ' - [.[] | select( - .created_at >= $after and - (.author | test($pattern; "i")) - )] -' "$ALL_COMMENTS_FILE" > "$FILTERED_FILE" - -# ======================================== -# Build Output -# ======================================== - -COMMENT_COUNT=$(jq 'length' "$FILTERED_FILE") - -# Get list of bots that responded -BOTS_RESPONDED=$(jq -r '[.[] | .author] | unique | join(",")' "$FILTERED_FILE") - -# Build final output -jq -n \ - --argjson comments "$(cat "$FILTERED_FILE")" \ - --arg bots_responded "$BOTS_RESPONDED" \ - --argjson has_new $(if [[ "$COMMENT_COUNT" -gt 0 ]]; then echo "true"; else echo "false"; fi) \ - '{ - comments: $comments, - bots_responded: ($bots_responded | split(",") | map(select(length > 0))), - has_new_comments: $has_new, - comment_count: ($comments | length) - }' - -exit 0 diff --git a/scripts/setup-pr-loop.sh b/scripts/setup-pr-loop.sh deleted file mode 100755 index 648250da..00000000 --- a/scripts/setup-pr-loop.sh +++ /dev/null @@ -1,945 +0,0 @@ -#!/usr/bin/env bash -# -# Setup script for start-pr-loop -# -# Creates state files for the PR loop that monitors GitHub PR reviews from bots. -# -# Usage: -# setup-pr-loop.sh --claude|--codex [--max N] [--codex-model MODEL:EFFORT] [--codex-timeout SECONDS] -# - -set -euo pipefail - -# ======================================== -# Default Configuration -# ======================================== - -# Override effort before sourcing loop-common.sh (PR loop defaults to medium effort). -# codex_model is NOT pre-set here so that config-backed values from loop-common.sh apply. -DEFAULT_CODEX_EFFORT="medium" -DEFAULT_CODEX_TIMEOUT=900 -DEFAULT_MAX_ITERATIONS=42 - -# Polling configuration -POLL_INTERVAL=30 -POLL_TIMEOUT=900 # 15 minutes per bot - -# Default timeout for git operations (30 seconds) -GIT_TIMEOUT=30 - -# Default timeout for GitHub CLI operations (60 seconds) -GH_TIMEOUT=60 - -# Source portable timeout wrapper -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -source "$SCRIPT_DIR/portable-timeout.sh" - -# Source template loader and shared loop library (provides DEFAULT_CODEX_MODEL and other constants) -HOOKS_LIB_DIR="$(cd "$SCRIPT_DIR/../hooks/lib" && pwd)" -source "$HOOKS_LIB_DIR/template-loader.sh" -source "$HOOKS_LIB_DIR/loop-common.sh" - -# Initialize template directory -TEMPLATE_DIR="${TEMPLATE_DIR:-$(get_template_dir "$HOOKS_LIB_DIR")}" - -# ======================================== -# Parse Arguments -# ======================================== - -MAX_ITERATIONS="$DEFAULT_MAX_ITERATIONS" -CODEX_MODEL="$DEFAULT_CODEX_MODEL" -CODEX_EFFORT="$DEFAULT_CODEX_EFFORT" -CODEX_TIMEOUT="$DEFAULT_CODEX_TIMEOUT" - -# Bot flags -BOT_CLAUDE="false" -BOT_CODEX="false" - -show_help() { - cat << 'HELP_EOF' -start-pr-loop - PR review loop with remote bot monitoring - -USAGE: - /humanize:start-pr-loop --claude|--codex [OPTIONS] - -BOT FLAGS (at least one required): - --claude Monitor reviews from claude[bot] (trigger: @claude) - --codex Monitor reviews from chatgpt-codex-connector[bot] (trigger: @codex) - -OPTIONS: - --max Maximum iterations before auto-stop (default: 42) - --codex-model - Codex model and reasoning effort (default from config, effort: medium) - --codex-timeout - Timeout for each Codex review in seconds (default: 900) - -h, --help Show this help message - -DESCRIPTION: - Starts a PR review loop that: - - 1. Detects the PR associated with the current branch - 2. Fetches review comments from the specified bot(s) - 3. Analyzes and fixes issues identified by the bot(s) - 4. Pushes changes and triggers re-review by commenting @bot - 5. Waits for bot response (polls every 30s, 15min timeout) - 6. Uses local Codex to verify if remote concerns are valid - - The flow: - 1. Claude analyzes PR comments and fixes issues - 2. Claude pushes changes and comments @bot on PR - 3. Stop Hook polls for new bot reviews - 4. When reviews arrive, local Codex validates them - 5. If issues found, Claude continues fixing - 6. If all bots approve, loop ends - -EXAMPLES: - /humanize:start-pr-loop --claude - /humanize:start-pr-loop --codex --max 20 - /humanize:start-pr-loop --claude --codex - -STOPPING: - - /humanize:cancel-pr-loop Cancel the active PR loop - - Reach --max iterations - - All bots approve the changes - -MONITORING: - humanize monitor pr -HELP_EOF - exit 0 -} - -while [[ $# -gt 0 ]]; do - case $1 in - -h|--help) - show_help - ;; - --claude) - BOT_CLAUDE="true" - shift - ;; - --codex) - BOT_CODEX="true" - shift - ;; - --max) - if [[ -z "${2:-}" ]]; then - echo "Error: --max requires a number argument" >&2 - exit 1 - fi - if ! [[ "$2" =~ ^[0-9]+$ ]]; then - echo "Error: --max must be a positive integer, got: $2" >&2 - exit 1 - fi - MAX_ITERATIONS="$2" - shift 2 - ;; - --codex-model) - if [[ -z "${2:-}" ]]; then - echo "Error: --codex-model requires a MODEL:EFFORT argument" >&2 - exit 1 - fi - # Parse MODEL:EFFORT format (portable - works in bash and zsh) - if [[ "$2" == *:* ]]; then - CODEX_MODEL="${2%%:*}" - CODEX_EFFORT="${2#*:}" - else - CODEX_MODEL="$2" - CODEX_EFFORT="$DEFAULT_CODEX_EFFORT" - fi - shift 2 - ;; - --codex-timeout) - if [[ -z "${2:-}" ]]; then - echo "Error: --codex-timeout requires a number argument (seconds)" >&2 - exit 1 - fi - if ! [[ "$2" =~ ^[0-9]+$ ]]; then - echo "Error: --codex-timeout must be a positive integer (seconds), got: $2" >&2 - exit 1 - fi - CODEX_TIMEOUT="$2" - shift 2 - ;; - -*) - echo "Error: Unknown option: $1" >&2 - echo "Use --help for usage information" >&2 - exit 1 - ;; - *) - echo "Error: Unexpected argument: $1" >&2 - echo "Use --help for usage information" >&2 - exit 1 - ;; - esac -done - -# ======================================== -# Validate Bot Flags -# ======================================== - -if [[ "$BOT_CLAUDE" != "true" && "$BOT_CODEX" != "true" ]]; then - echo "Error: At least one bot flag is required" >&2 - echo "" >&2 - echo "Usage: /humanize:start-pr-loop --claude|--codex [OPTIONS]" >&2 - echo "" >&2 - echo "Bot flags:" >&2 - echo " --claude Monitor reviews from claude[bot] (trigger: @claude)" >&2 - echo " --codex Monitor reviews from chatgpt-codex-connector[bot] (trigger: @codex)" >&2 - echo "" >&2 - echo "For help: /humanize:start-pr-loop --help" >&2 - exit 1 -fi - -# Build active_bots list (stored as array for YAML list format) -# Bot names stored in state: claude, codex -# Trigger mentions: @claude, @codex -# Comment authors: claude[bot], chatgpt-codex-connector[bot] -declare -a ACTIVE_BOTS_ARRAY=() -if [[ "$BOT_CLAUDE" == "true" ]]; then - ACTIVE_BOTS_ARRAY+=("claude") -fi -if [[ "$BOT_CODEX" == "true" ]]; then - ACTIVE_BOTS_ARRAY+=("codex") -fi - -# ======================================== -# Validate Prerequisites -# ======================================== - -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" - -# loop-common.sh already sourced above (provides find_active_loop, find_active_pr_loop, etc.) - -# Build dynamic mention string from active bots (using shared helper) -BOT_MENTION_STRING=$(build_bot_mention_string "${ACTIVE_BOTS_ARRAY[@]}") - -# ======================================== -# Mutual Exclusion Check -# ======================================== - -# Check for existing active loops (both RLCR and PR loops) -# Only one loop type can be active at a time -RLCR_LOOP_DIR=$(find_active_loop "$PROJECT_ROOT/.humanize/rlcr" 2>/dev/null || echo "") -PR_LOOP_DIR=$(find_active_pr_loop "$PROJECT_ROOT/.humanize/pr-loop" 2>/dev/null || echo "") - -if [[ -n "$RLCR_LOOP_DIR" ]]; then - echo "Error: An RLCR loop is already active" >&2 - echo " Active loop: $RLCR_LOOP_DIR" >&2 - echo "" >&2 - echo "Only one loop can be active at a time." >&2 - echo "Cancel the RLCR loop first with: /humanize:cancel-rlcr-loop" >&2 - exit 1 -fi - -if [[ -n "$PR_LOOP_DIR" ]]; then - echo "Error: A PR loop is already active" >&2 - echo " Active loop: $PR_LOOP_DIR" >&2 - echo "" >&2 - echo "Only one loop can be active at a time." >&2 - echo "Cancel the PR loop first with: /humanize:cancel-pr-loop" >&2 - exit 1 -fi - -# Check git repo (with timeout) -if ! run_with_timeout "$GIT_TIMEOUT" git rev-parse --git-dir &>/dev/null; then - echo "Error: Project must be a git repository (or git command timed out)" >&2 - exit 1 -fi - -# Check at least one commit (with timeout) -if ! run_with_timeout "$GIT_TIMEOUT" git rev-parse HEAD &>/dev/null 2>&1; then - echo "Error: Git repository must have at least one commit (or git command timed out)" >&2 - exit 1 -fi - -# Check gh CLI is installed -if ! command -v gh &>/dev/null; then - echo "Error: start-pr-loop requires the GitHub CLI (gh) to be installed" >&2 - echo "" >&2 - echo "Please install the GitHub CLI: https://cli.github.com/" >&2 - exit 1 -fi - -# Check gh CLI is authenticated -if ! gh auth status &>/dev/null 2>&1; then - echo "Error: GitHub CLI is not authenticated" >&2 - echo "" >&2 - echo "Please run: gh auth login" >&2 - exit 1 -fi - -# Check codex is available -if ! command -v codex &>/dev/null; then - echo "Error: start-pr-loop requires codex to run" >&2 - echo "" >&2 - echo "Please install Codex CLI: https://openai.com/codex" >&2 - exit 1 -fi - -# ======================================== -# Detect PR -# ======================================== - -START_BRANCH=$(run_with_timeout "$GIT_TIMEOUT" git -C "$PROJECT_ROOT" rev-parse --abbrev-ref HEAD) -if [[ -z "$START_BRANCH" ]]; then - echo "Error: Failed to get current branch (git command timed out or failed)" >&2 - exit 1 -fi - -# ======================================== -# Resolve Repository Context (for fork PR support) -# ======================================== -# IMPORTANT: For fork PRs, the PR lives in the upstream (parent) repo, not the fork. -# We must resolve the correct repo BEFORE attempting to get PR number/state. - -# Step 1: Get current repo -CURRENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json owner,name \ - -q '.owner.login + "/" + .name' 2>/dev/null) || CURRENT_REPO="" - -# Step 2: Check if current repo is a fork and get parent repo -PARENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json parent \ - -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - -# Step 3: Determine which repo to use for PR lookups -# Try current repo first, then parent (for fork case) -PR_LOOKUP_REPO="" -PR_NUMBER="" - -# Try to find PR using gh's auto-detection (no --repo flag) -# This handles cases where local branch name differs from PR head (e.g., renamed branch) -# IMPORTANT: gh pr view can auto-resolve to upstream repo when in a fork, so we must -# extract the actual repo from the PR URL rather than assuming it's CURRENT_REPO -PR_INFO=$(run_with_timeout "$GH_TIMEOUT" gh pr view --json number,url -q '.number,.url' 2>/dev/null) || PR_INFO="" -if [[ -n "$PR_INFO" ]]; then - # Parse number and URL from newline-separated output (jq outputs each field on separate line) - PR_NUMBER=$(echo "$PR_INFO" | head -1) - PR_URL=$(echo "$PR_INFO" | tail -1) - # Validate PR_NUMBER is numeric - if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Error: Invalid PR number from gh CLI: $PR_INFO" >&2 - PR_NUMBER="" - PR_URL="" - else - # Extract repo from URL: https://HOST/OWNER/REPO/pull/NUMBER -> OWNER/REPO - # Works with github.com and GitHub Enterprise (any host) - if [[ "$PR_URL" =~ https?://[^/]+/([^/]+/[^/]+)/pull/ ]]; then - PR_LOOKUP_REPO="${BASH_REMATCH[1]}" - else - # Fallback to current repo if URL parsing fails - PR_LOOKUP_REPO="$CURRENT_REPO" - fi - fi -fi - -# If not found in current repo and we have a parent (fork case), try parent -# IMPORTANT: For fork PRs, the head branch lives in the fork, so we must use -# the fork-qualified format (FORK_OWNER:BRANCH) when looking up in parent repo -if [[ -z "$PR_NUMBER" && -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - echo "Checking parent repo for PR (fork detected)..." >&2 - # Extract fork owner from CURRENT_REPO (format: owner/repo) - FORK_OWNER="${CURRENT_REPO%%/*}" - # Use fork-qualified branch name: FORK_OWNER:BRANCH - QUALIFIED_BRANCH="${FORK_OWNER}:${START_BRANCH}" - echo " Using qualified branch: $QUALIFIED_BRANCH" >&2 - PR_NUMBER=$(run_with_timeout "$GH_TIMEOUT" gh pr view --repo "$PARENT_REPO" "$QUALIFIED_BRANCH" --json number -q .number 2>/dev/null) || PR_NUMBER="" - if [[ -n "$PR_NUMBER" ]]; then - PR_LOOKUP_REPO="$PARENT_REPO" - echo "Found PR #$PR_NUMBER in parent repo: $PARENT_REPO" >&2 - fi -fi - -if [[ -z "$PR_NUMBER" ]]; then - echo "Error: No pull request found for branch '$START_BRANCH'" >&2 - echo "" >&2 - echo "Please create a pull request first:" >&2 - echo " gh pr create" >&2 - exit 1 -fi - -# Validate PR_NUMBER is numeric -if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Error: Invalid PR number from gh CLI: $PR_NUMBER" >&2 - exit 1 -fi - -# Get PR state (using resolved repo for fork support) -PR_STATE=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_LOOKUP_REPO" --json state -q .state 2>/dev/null) || PR_STATE="" -if [[ "$PR_STATE" == "MERGED" ]]; then - echo "Error: PR #$PR_NUMBER has already been merged" >&2 - exit 1 -fi -if [[ "$PR_STATE" == "CLOSED" ]]; then - echo "Error: PR #$PR_NUMBER has been closed" >&2 - exit 1 -fi - -# IMPORTANT: Use the PR's lookup repository for API calls -# Since PR_LOOKUP_REPO was already validated to contain this PR, we can use it directly -PR_BASE_REPO="$PR_LOOKUP_REPO" - -# ======================================== -# Validate YAML Safety -# ======================================== - -# Validate branch name for YAML safety (prevents injection in state.md) -if [[ "$START_BRANCH" == *[:\#\"\'\`]* ]] || [[ "$START_BRANCH" =~ $'\n' ]]; then - echo "Error: Branch name contains YAML-unsafe characters" >&2 - echo " Branch: $START_BRANCH" >&2 - echo " Characters not allowed: : # \" ' \` newline" >&2 - echo " Please checkout a branch with a simpler name" >&2 - exit 1 -fi - -# Validate codex model for YAML safety -if [[ ! "$CODEX_MODEL" =~ ^[a-zA-Z0-9._-]+$ ]]; then - echo "Error: Codex model contains invalid characters" >&2 - echo " Model: $CODEX_MODEL" >&2 - echo " Only alphanumeric, hyphen, underscore, dot allowed" >&2 - exit 1 -fi - -# Validate codex effort for YAML safety -if [[ ! "$CODEX_EFFORT" =~ ^[a-zA-Z0-9_-]+$ ]]; then - echo "Error: Codex effort contains invalid characters" >&2 - echo " Effort: $CODEX_EFFORT" >&2 - echo " Only alphanumeric, hyphen, underscore allowed" >&2 - exit 1 -fi - -# ======================================== -# Setup State Directory -# ======================================== - -LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/pr-loop" - -# Create timestamp for this loop session -TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S) -LOOP_DIR="$LOOP_BASE_DIR/$TIMESTAMP" - -mkdir -p "$LOOP_DIR" - -# ======================================== -# Fetch Initial Comments -# ======================================== - -COMMENT_FILE="$LOOP_DIR/round-0-pr-comment.md" - -# Build comma-separated bot list for fetch script -BOTS_COMMA_LIST=$(IFS=','; echo "${ACTIVE_BOTS_ARRAY[*]}") - -# Call fetch-pr-comments.sh to get all comments, grouped by active bots -"$SCRIPT_DIR/fetch-pr-comments.sh" "$PR_NUMBER" "$COMMENT_FILE" --bots "$BOTS_COMMA_LIST" - -# ======================================== -# Determine Startup Case -# ======================================== - -# Call check-pr-reviewer-status.sh to analyze PR state -REVIEWER_STATUS=$("$SCRIPT_DIR/check-pr-reviewer-status.sh" "$PR_NUMBER" --bots "$BOTS_COMMA_LIST" 2>/dev/null) || { - echo "Warning: Failed to check reviewer status, defaulting to Case 1" >&2 - REVIEWER_STATUS='{"case":1,"reviewers_commented":[],"reviewers_missing":[],"latest_commit_sha":"","latest_commit_at":"","newest_review_at":null,"has_commits_after_reviews":false}' -} - -# Parse reviewer status JSON -STARTUP_CASE=$(echo "$REVIEWER_STATUS" | jq -r '.case') -LATEST_COMMIT_SHA=$(echo "$REVIEWER_STATUS" | jq -r '.latest_commit_sha') -LATEST_COMMIT_AT=$(echo "$REVIEWER_STATUS" | jq -r '.latest_commit_at') -HAS_COMMITS_AFTER=$(echo "$REVIEWER_STATUS" | jq -r '.has_commits_after_reviews') - -# Fallback to git HEAD if API didn't return commit SHA -if [[ -z "$LATEST_COMMIT_SHA" ]] || [[ "$LATEST_COMMIT_SHA" == "null" ]]; then - LATEST_COMMIT_SHA=$(run_with_timeout "$GIT_TIMEOUT" git rev-parse HEAD) -fi - -echo "Startup Case: $STARTUP_CASE" >&2 -echo "Latest Commit: $LATEST_COMMIT_SHA" >&2 - -# Handle Case 4/5: All reviewers commented but new commits exist -# Need to trigger re-review by posting @bot comment -LAST_TRIGGER_AT="" -TRIGGER_COMMENT_ID="" - -if [[ "$STARTUP_CASE" -eq 4 ]] || [[ "$STARTUP_CASE" -eq 5 ]]; then - # First, check if there's already a pending @mention after the latest commit - # This avoids duplicate @mention spam when user has already requested re-review - echo "Case $STARTUP_CASE: Checking for existing trigger comment after latest commit..." >&2 - - # Build regex patterns for bot mentions with word boundary anchoring - # Pattern: (start|non-username-char) + @botname + (end|non-username-char) - # Prevents false matches like @claude-dev or support@codex.io - MENTION_PATTERNS_JSON=$(printf '%s\n' "${ACTIVE_BOTS_ARRAY[@]}" | jq -R '"(^|[^a-zA-Z0-9_-])@" + . + "($|[^a-zA-Z0-9_-])"' | jq -s '.') - - # Find existing trigger comment that mentions ALL active bots after latest commit - # Notes: - # - Uses PR_BASE_REPO for fork PR support - # - Uses jq -s to aggregate paginated results before filtering - # - Reuse only when ALL bots are mentioned (partial mentions need new trigger) - # - Strips code blocks/inline code/quotes since GitHub ignores mentions there - if [[ -n "$LATEST_COMMIT_AT" && "$LATEST_COMMIT_AT" != "null" ]]; then - EXISTING_TRIGGER=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/$PR_NUMBER/comments" \ - --paginate 2>/dev/null \ - | jq -s --arg since "$LATEST_COMMIT_AT" --argjson patterns "$MENTION_PATTERNS_JSON" ' - # Strip content between delimiters, keeping even-indexed parts (outside delimiters) - # Used for fenced code blocks where regex fails on nested backticks - def strip_between(delim): [splits(delim)] | to_entries | map(select(.key % 2 == 0) | .value) | join(" "); - - # Strip code blocks, inline code, and quoted lines (GitHub ignores mentions in these) - def strip_non_mention_contexts: - strip_between("```") # fenced code blocks - | strip_between("~~~") # tilde fenced code blocks - | gsub("`[^`]*`"; " ") # inline code - | gsub("(^|\\n)( |\\t)[^\\n]*"; " ") # indented code blocks (4+ spaces or tab) - | gsub("(^|\\n)\\s*>[^\\n]*"; " "); # quoted lines (> prefix) - - [.[][] | select(.created_at > $since and ( - # Check that ALL patterns are present in the stripped body - # Use case-insensitive matching since GitHub mentions are case-insensitive - (.body | strip_non_mention_contexts) as $clean_body - | $patterns | all(. as $p | $clean_body | test($p; "i")) - ))] - | sort_by(.created_at) - | last - | {id: .id, created_at: .created_at} - ') || EXISTING_TRIGGER="" - else - EXISTING_TRIGGER="" - fi - - # Extract fields once to avoid repeated jq calls - # Skip jq parsing if EXISTING_TRIGGER is empty (API failure fallback) - if [[ -n "$EXISTING_TRIGGER" ]]; then - TRIGGER_COMMENT_ID=$(echo "$EXISTING_TRIGGER" | jq -r '.id // empty' 2>/dev/null) || TRIGGER_COMMENT_ID="" - LAST_TRIGGER_AT=$(echo "$EXISTING_TRIGGER" | jq -r '.created_at // empty' 2>/dev/null) || LAST_TRIGGER_AT="" - else - TRIGGER_COMMENT_ID="" - LAST_TRIGGER_AT="" - fi - - if [[ -n "$TRIGGER_COMMENT_ID" ]]; then - # Found existing @mention - reuse it instead of posting new one - echo "Found existing trigger comment (ID: $TRIGGER_COMMENT_ID), skipping duplicate @mention" >&2 - else - # No existing @mention - post new trigger - echo "No existing trigger found, posting trigger comment for re-review..." >&2 - - # Post trigger comment (abort on failure to prevent orphaned state) - # NOTE: Uses --repo for fork PR support (comments go to base repo, not fork) - TRIGGER_BODY="$BOT_MENTION_STRING please review the latest changes (new commits since last review)" - TRIGGER_RESULT=$(run_with_timeout "$GH_TIMEOUT" gh pr comment "$PR_NUMBER" --repo "$PR_BASE_REPO" --body "$TRIGGER_BODY" 2>&1) || { - echo "Error: Failed to post trigger comment: $TRIGGER_RESULT" >&2 - echo "" >&2 - echo "Cannot proceed without a trigger comment - bots would not be notified." >&2 - echo "Please check:" >&2 - echo " - GitHub API rate limits" >&2 - echo " - Network connectivity" >&2 - echo " - Repository permissions" >&2 - rm -rf "$LOOP_DIR" - exit 1 - } - - # Get the comment ID and use GitHub's timestamp to avoid clock skew - # Fetch the latest comment from current user - CURRENT_USER=$(run_with_timeout "$GH_TIMEOUT" gh api user --jq '.login' 2>/dev/null) || CURRENT_USER="" - if [[ -n "$CURRENT_USER" ]]; then - # Fetch both ID and created_at from the comment we just posted - # IMPORTANT: --jq with --paginate runs per-page, so aggregate first then filter - # IMPORTANT: Use PR_BASE_REPO for fork PR support - COMMENT_DATA=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/$PR_NUMBER/comments" \ - --paginate --jq ".[] | select(.user.login == \"$CURRENT_USER\") | {id: .id, created_at: .created_at}" 2>/dev/null \ - | jq -s 'sort_by(.created_at) | reverse | .[0]') || COMMENT_DATA="" - - if [[ -n "$COMMENT_DATA" && "$COMMENT_DATA" != "null" ]]; then - TRIGGER_COMMENT_ID=$(echo "$COMMENT_DATA" | jq -r '.id // empty') - # Use GitHub's timestamp instead of local time to avoid clock skew - LAST_TRIGGER_AT=$(echo "$COMMENT_DATA" | jq -r '.created_at // empty') - fi - fi - - # NOTE: Do NOT fall back to local time if GitHub timestamp fetch failed. - # Local clock skew could set a future timestamp, causing stop hook to filter - # out all comments. The stop hook has its own trigger detection logic that - # will find the trigger comment if LAST_TRIGGER_AT is empty. - fi - - # If --claude is specified, verify eyes reaction (MANDATORY per plan) - if [[ "$BOT_CLAUDE" == "true" ]]; then - echo "Verifying Claude eyes reaction (3 attempts x 5 seconds)..." >&2 - - if [[ -z "$TRIGGER_COMMENT_ID" ]]; then - # Fail if trigger comment ID not found (can't verify eyes without it) - echo "Error: Could not find trigger comment ID for eyes verification" >&2 - echo "" >&2 - echo "The trigger comment was posted but its ID could not be retrieved." >&2 - echo "This prevents verification of Claude's eyes reaction." >&2 - echo "" >&2 - echo "Please try:" >&2 - echo " 1. Wait a moment and try again" >&2 - echo " 2. Check GitHub rate limits" >&2 - echo " 3. Verify the comment was posted successfully" >&2 - - # Clean up the loop directory since we're failing - rm -rf "$LOOP_DIR" - exit 1 - fi - - # Check for eyes reaction with retry - # Pass --pr for fork PR support (reactions are on base repo) - if ! "$SCRIPT_DIR/check-bot-reactions.sh" claude-eyes "$TRIGGER_COMMENT_ID" --pr "$PR_NUMBER" --retry 3 --delay 5 >/dev/null 2>&1; then - echo "Error: Claude bot did not respond with eyes reaction" >&2 - echo "" >&2 - echo "This may indicate:" >&2 - echo " - Claude bot is not configured on this repository" >&2 - echo " - Network issues preventing Claude from seeing the mention" >&2 - echo "" >&2 - echo "Please verify Claude bot is set up correctly on this repository." >&2 - - # Clean up the loop directory since we're failing - rm -rf "$LOOP_DIR" - exit 1 - fi - echo "Claude eyes reaction confirmed!" >&2 - fi -fi - -# ======================================== -# Create State File -# ======================================== - -# Build YAML list for active_bots and configured_bots (using shared helper) -ACTIVE_BOTS_YAML=$(build_yaml_list "${ACTIVE_BOTS_ARRAY[@]}") - -# configured_bots is identical to active_bots at start, but never changes -# This allows re-polling previously approved bots if they post new issues -CONFIGURED_BOTS_YAML="$ACTIVE_BOTS_YAML" - -cat > "$LOOP_DIR/state.md" << EOF ---- -current_round: 0 -max_iterations: $MAX_ITERATIONS -pr_number: $PR_NUMBER -start_branch: $START_BRANCH -configured_bots:${CONFIGURED_BOTS_YAML} -active_bots:${ACTIVE_BOTS_YAML} -codex_model: $CODEX_MODEL -codex_effort: $CODEX_EFFORT -codex_timeout: $CODEX_TIMEOUT -poll_interval: $POLL_INTERVAL -poll_timeout: $POLL_TIMEOUT -started_at: $(date -u +%Y-%m-%dT%H:%M:%SZ) -startup_case: $STARTUP_CASE -latest_commit_sha: $LATEST_COMMIT_SHA -latest_commit_at: ${LATEST_COMMIT_AT:-} -last_trigger_at: ${LAST_TRIGGER_AT:-} -trigger_comment_id: ${TRIGGER_COMMENT_ID:-} ---- -EOF - -# ======================================== -# Create Goal Tracker -# ======================================== - -GOAL_TRACKER_FILE="$LOOP_DIR/goal-tracker.md" - -# Build display string for active bots -ACTIVE_BOTS_DISPLAY=$(IFS=', '; echo "${ACTIVE_BOTS_ARRAY[*]}") - -# Build acceptance criteria rows for each bot -BOT_AC_ROWS="" -AC_NUM=1 -for bot in "${ACTIVE_BOTS_ARRAY[@]}"; do - BOT_AC_ROWS="${BOT_AC_ROWS}| AC-${AC_NUM} | Get approval from ${bot} | ${bot} | pending | -" - AC_NUM=$((AC_NUM + 1)) -done - -# Current timestamp for log -STARTED_AT=$(date -u +%Y-%m-%dT%H:%M:%SZ) - -# Goal tracker template variables -GOAL_TRACKER_VARS=( - "PR_NUMBER=$PR_NUMBER" - "START_BRANCH=$START_BRANCH" - "ACTIVE_BOTS_DISPLAY=$ACTIVE_BOTS_DISPLAY" - "STARTUP_CASE=$STARTUP_CASE" - "BOT_AC_ROWS=$BOT_AC_ROWS" - "STARTED_AT=$STARTED_AT" -) - -FALLBACK_GOAL_TRACKER="# PR Loop Goal Tracker - -## PR Information - -- **PR Number:** #$PR_NUMBER -- **Branch:** $START_BRANCH -- **Monitored Bots:** $ACTIVE_BOTS_DISPLAY -- **Startup Case:** $STARTUP_CASE - -## Ultimate Goal - -Get all monitored bot reviewers ($ACTIVE_BOTS_DISPLAY) to approve this PR. - -## Acceptance Criteria - -| AC | Description | Bot | Status | -|----|-------------|-----|--------| -${BOT_AC_ROWS} -## Current Status - -### Round 0: Initialization - -- **Phase:** Waiting for initial bot reviews -- **Active Bots:** $ACTIVE_BOTS_DISPLAY -- **Approved Bots:** (none yet) - -### Open Issues - -| Round | Bot | Issue | Status | -|-------|-----|-------|--------| -| - | - | (awaiting initial reviews) | pending | - -### Addressed Issues - -| Round | Bot | Issue | Resolution | -|-------|-----|-------|------------| - -## Log - -| Round | Timestamp | Event | -|-------|-----------|-------| -| 0 | $STARTED_AT | PR loop initialized (Case $STARTUP_CASE) | -" - -GOAL_TRACKER_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/goal-tracker-initial.md" "$FALLBACK_GOAL_TRACKER" "${GOAL_TRACKER_VARS[@]}") -echo "$GOAL_TRACKER_CONTENT" > "$GOAL_TRACKER_FILE" - -echo "Goal tracker created: $GOAL_TRACKER_FILE" >&2 - -# ======================================== -# Create Initial Prompt -# ======================================== - -RESOLVE_PATH="$LOOP_DIR/round-0-pr-resolve.md" - -# Detect if comments exist by checking for the "No comments found" sentinel -# fetch-pr-comments.sh outputs "*No comments found.*" only when there are zero comments -if grep -q '^\*No comments found\.\*$' "$COMMENT_FILE" 2>/dev/null; then - COMMENT_COUNT=0 -else - COMMENT_COUNT=1 # Non-zero indicates comments exist -fi - -# Template variables for rendering -TEMPLATE_VARS=( - "PR_NUMBER=$PR_NUMBER" - "START_BRANCH=$START_BRANCH" - "ACTIVE_BOTS_DISPLAY=$ACTIVE_BOTS_DISPLAY" - "RESOLVE_PATH=$RESOLVE_PATH" - "BOT_MENTION_STRING=$BOT_MENTION_STRING" -) - -# Fallback header (used if template fails to load) -FALLBACK_HEADER="Read and execute below with ultrathink - -## PR Review Loop (Round 0) - -You are in a PR review loop monitoring feedback from remote review bots. - -**PR Information:** -- PR Number: #{{PR_NUMBER}} -- Branch: {{START_BRANCH}} -- Active Bots: {{ACTIVE_BOTS_DISPLAY}} - -## Review Comments - -The following comments have been fetched from the PR: -" - -# Load and render header template -HEADER_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/round-0-header.md" "$FALLBACK_HEADER" "${TEMPLATE_VARS[@]}") - -# Write header to prompt file -echo "$HEADER_CONTENT" > "$LOOP_DIR/round-0-prompt.md" - -# Append the fetched comments -cat "$COMMENT_FILE" >> "$LOOP_DIR/round-0-prompt.md" - -# Select task template based on whether there are comments -if [[ "$COMMENT_COUNT" -eq 0 ]]; then - # No comments yet - this is a fresh PR, bots will review automatically - FALLBACK_TASK=" ---- - -## Your Task - -This PR has no review comments yet. The monitored bots ({{ACTIVE_BOTS_DISPLAY}}) will automatically review the PR - you do NOT need to comment to trigger the first review. - -1. **Wait for automatic bot reviews**: - - Simply write your summary and try to exit - - The Stop Hook will poll for the first bot reviews - -2. **Write your initial summary** to: @{{RESOLVE_PATH}} - - Note that this is Round 0 awaiting initial bot reviews - - No issues to address yet - ---- - -## Important Rules - -1. **Do not comment to trigger review**: First reviews are automatic -2. **Do not modify state files**: The .humanize/pr-loop/ files are managed by the system -3. **Trust the process**: The Stop Hook manages polling and Codex validation - ---- - -Note: After you write your summary and try to exit, the Stop Hook will: -1. Poll for bot reviews (every 30 seconds, up to 15 minutes per bot) -2. When reviews arrive, local Codex will validate if they indicate approval -3. If issues are found, you will receive feedback and continue -4. If all bots approve, the loop ends -" - TASK_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/round-0-task-no-comments.md" "$FALLBACK_TASK" "${TEMPLATE_VARS[@]}") -else - # Has comments - normal flow with issues to address - FALLBACK_TASK=" ---- - -## Your Task - -1. **Analyze the comments above**, prioritizing: - - Human comments first (they take precedence) - - Bot comments (newest first) - -2. **Fix any issues** identified by the reviewers: - - Read the relevant code files - - Make necessary changes - - Create appropriate tests if needed - -3. **After fixing issues**: - - Commit your changes with a descriptive message - - Push to the remote repository - - Comment on the PR to trigger re-review: - \`\`\`bash - gh pr comment {{PR_NUMBER}} --body \"{{BOT_MENTION_STRING}} please review the latest changes\" - \`\`\` - -4. **Write your resolution summary** to: @{{RESOLVE_PATH}} - - List what issues were addressed - - Files modified - - Tests added (if any) - ---- - -## Important Rules - -1. **Do not modify state files**: The .humanize/pr-loop/ files are managed by the system -2. **Always push changes**: Your fixes must be pushed for bots to review them -3. **Use the correct comment format**: Tag the bots to trigger their reviews -4. **Be thorough**: Address all valid concerns from the reviewers - ---- - -Note: After you write your summary and try to exit, the Stop Hook will: -1. Poll for new bot reviews (every 30 seconds, up to 15 minutes per bot) -2. When reviews arrive, local Codex will validate if they indicate approval -3. If issues remain, you will receive feedback and continue -4. If all bots approve, the loop ends -" - TASK_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/round-0-task-has-comments.md" "$FALLBACK_TASK" "${TEMPLATE_VARS[@]}") -fi - -# Append task section to prompt file -echo "$TASK_CONTENT" >> "$LOOP_DIR/round-0-prompt.md" - -# ======================================== -# Output Setup Message -# ======================================== - -# All important work is done. If output fails due to SIGPIPE (pipe closed), exit cleanly. -trap 'exit 0' PIPE - -cat << EOF -=== start-pr-loop activated === - -PR Number: #$PR_NUMBER -Branch: $START_BRANCH -Active Bots: $ACTIVE_BOTS_DISPLAY -Comments Fetched: $COMMENT_COUNT -Max Iterations: $MAX_ITERATIONS -Codex Model: $CODEX_MODEL -Codex Effort: $CODEX_EFFORT -Codex Timeout: ${CODEX_TIMEOUT}s -Poll Interval: ${POLL_INTERVAL}s -Poll Timeout: ${POLL_TIMEOUT}s (per bot) -Loop Directory: $LOOP_DIR - -The PR loop is now active. When you try to exit: -1. Stop Hook polls for new bot reviews (every 30s) -2. When reviews arrive, local Codex validates them -3. If issues remain, you'll receive feedback and continue -4. If all bots approve, the loop ends - -To cancel: /humanize:cancel-pr-loop - ---- - -EOF - -# Output the initial prompt -cat "$LOOP_DIR/round-0-prompt.md" - -# Output critical requirements based on whether there are comments -echo "" -if [[ "$COMMENT_COUNT" -eq 0 ]]; then - FALLBACK_CRITICAL=" -=========================================== -CRITICAL - Work Completion Requirements -=========================================== - -When you complete your work, you MUST: - -1. Write your resolution summary to: - {{RESOLVE_PATH}} - - The summary should note: - - This is Round 0 awaiting initial bot reviews - - No issues to address yet - -2. Try to exit - the Stop Hook will poll for bot reviews - -DO NOT comment on the PR to trigger review - the bots will -review automatically since this is a new PR. - -The Stop Hook will poll for bot reviews. -===========================================" - CRITICAL_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/critical-requirements-no-comments.md" "$FALLBACK_CRITICAL" "${TEMPLATE_VARS[@]}") -else - FALLBACK_CRITICAL=" -=========================================== -CRITICAL - Work Completion Requirements -=========================================== - -When you complete your work, you MUST: - -1. COMMIT and PUSH your changes: - - Create a commit with descriptive message - - Push to the remote repository - -2. Comment on the PR to trigger re-review: - gh pr comment {{PR_NUMBER}} --body \"{{BOT_MENTION_STRING}} please review\" - -3. Write your resolution summary to: - {{RESOLVE_PATH}} - - The summary should include: - - Issues addressed - - Files modified - - Tests added (if any) - -The Stop Hook will then poll for bot reviews. -===========================================" - CRITICAL_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/critical-requirements-has-comments.md" "$FALLBACK_CRITICAL" "${TEMPLATE_VARS[@]}") -fi -echo "$CRITICAL_CONTENT" - -# Explicit exit 0 to ensure clean exit code even if final output fails -exit 0 diff --git a/scripts/setup-rlcr-loop.sh b/scripts/setup-rlcr-loop.sh index 47089b6f..9d45363c 100755 --- a/scripts/setup-rlcr-loop.sh +++ b/scripts/setup-rlcr-loop.sh @@ -325,7 +325,7 @@ done PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" -# loop-common.sh already sourced above (provides find_active_loop, find_active_pr_loop, etc.) +# loop-common.sh already sourced above (provides find_active_loop, etc.) # ======================================== # Required Dependency Check @@ -362,10 +362,8 @@ fi # Mutual Exclusion Check # ======================================== -# Check for existing active loops (both RLCR and PR loops) -# Only one loop type can be active at a time +# Check for existing active RLCR loop RLCR_LOOP_DIR=$(find_active_loop "$PROJECT_ROOT/.humanize/rlcr" 2>/dev/null || echo "") -PR_LOOP_DIR=$(find_active_pr_loop "$PROJECT_ROOT/.humanize/pr-loop" 2>/dev/null || echo "") if [[ -n "$RLCR_LOOP_DIR" ]]; then echo "Error: An RLCR loop is already active" >&2 @@ -376,15 +374,6 @@ if [[ -n "$RLCR_LOOP_DIR" ]]; then exit 1 fi -if [[ -n "$PR_LOOP_DIR" ]]; then - echo "Error: A PR loop is already active" >&2 - echo " Active loop: $PR_LOOP_DIR" >&2 - echo "" >&2 - echo "Only one loop can be active at a time." >&2 - echo "Cancel the PR loop first with: /humanize:cancel-pr-loop" >&2 - exit 1 -fi - # ======================================== # Agent Teams Validation # ======================================== diff --git a/skills/humanize/SKILL.md b/skills/humanize/SKILL.md index 2795eccc..5b0a6af3 100644 --- a/skills/humanize/SKILL.md +++ b/skills/humanize/SKILL.md @@ -1,6 +1,6 @@ --- name: humanize -description: Iterative development with AI review. Provides RLCR (Ralph-Loop with Codex Review) for implementation planning and code review loops, plus PR review automation with bot monitoring. +description: Iterative development with AI review. Provides RLCR (Ralph-Loop with Codex Review) for implementation planning and code review loops. user-invocable: false disable-model-invocation: true --- @@ -47,19 +47,7 @@ The RLCR (Ralph-Loop with Codex Review) loop has two phases: - If no issues → loop completes with Finalize Phase - On Codex CLI `0.114.0+` with `codex_hooks` enabled, Humanize installs a native `Stop` hook so exit gating runs automatically -### 2. PR Loop - Automated PR Review Handling - -Automates handling of GitHub PR reviews from remote bots: - -1. Detects the PR associated with the current branch -2. Fetches review comments from specified bot(s) (`--claude` and/or `--codex`) -3. AI analyzes and fixes issues identified by the bot(s) -4. Pushes changes and triggers re-review by commenting @bot -5. Stop Hook polls for new bot reviews (every 30s, 15min timeout per bot) -6. Local Codex validates if remote concerns are resolved -7. Loop continues until all bots approve or max iterations reached - -### 3. Generate Plan - Structured Plan from Draft +### 2. Generate Plan - Structured Plan from Draft Transforms a rough draft document into a structured implementation plan with: - Clear goal description @@ -106,30 +94,6 @@ After each round, write the required summary and stop/exit normally. Humanize's "{{HUMANIZE_RUNTIME_ROOT}}/scripts/cancel-rlcr-loop.sh" --force ``` -### Start PR Loop - -```bash -# Monitor claude[bot] reviews -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/setup-pr-loop.sh" --claude - -# Monitor chatgpt-codex-connector[bot] reviews -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/setup-pr-loop.sh" --codex - -# Monitor both -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/setup-pr-loop.sh" --claude --codex -``` - -**Common Options:** -- `--max N` - Maximum iterations (default: 42) -- `--codex-model MODEL:EFFORT` - Codex model for validation (default: gpt-5.4:medium) -- `--codex-timeout SECONDS` - Timeout for Codex validation (default: 900) - -### Cancel PR Loop - -```bash -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/cancel-pr-loop.sh" -``` - ### Generate Plan from Draft ```bash @@ -211,7 +175,7 @@ The RLCR loop uses a Goal Tracker to prevent goal drift: ## Prerequisites - `codex` - OpenAI Codex CLI (for review) -- `gh` - GitHub CLI (for PR loop) + ## Directory Structure @@ -231,10 +195,6 @@ Humanize stores all data in `.humanize/`: │ ├── methodology-analysis-report.md │ ├── methodology-analysis-done.md │ └── complete-state.md -├── pr-loop/ # PR loop data -│ └── / -│ ├── state.md -│ └── resolution-N.md └── skill/ # One-shot skill results └── / ├── input.md @@ -249,7 +209,6 @@ Use the monitor script to track loop progress: ```bash source "{{HUMANIZE_RUNTIME_ROOT}}/scripts/humanize.sh" humanize monitor rlcr # Monitor RLCR loop -humanize monitor pr # Monitor PR loop ``` ## Exit Codes diff --git a/tests/mocks/gh b/tests/mocks/gh index d91a58c9..7d4dff06 100755 --- a/tests/mocks/gh +++ b/tests/mocks/gh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Mock gh CLI for testing PR loop functionality +# Mock gh CLI for testing GitHub API interactions # # This mock intercepts gh commands and returns predefined responses # based on environment variables and fixture files. diff --git a/tests/robustness/test-concurrent-state-robustness.sh b/tests/robustness/test-concurrent-state-robustness.sh index ad72b3fc..57115bcf 100755 --- a/tests/robustness/test-concurrent-state-robustness.sh +++ b/tests/robustness/test-concurrent-state-robustness.sh @@ -386,61 +386,6 @@ else fail "Unicode content" "2" "$ROUND" fi -# ======================================== -# PR Loop State Tests -# ======================================== - -echo "" -echo "--- PR Loop State Tests ---" -echo "" - -# Test 16: find_active_pr_loop works correctly -echo "Test 16: find_active_pr_loop detection" -mkdir -p "$TEST_DIR/pr-loops/pr-loop/2026-01-19_12-00-00" -cat > "$TEST_DIR/pr-loops/pr-loop/2026-01-19_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 ---- -EOF - -ACTIVE=$(find_active_pr_loop "$TEST_DIR/pr-loops/pr-loop" 2>/dev/null || echo "") -if [[ "$ACTIVE" == *"2026-01-19"* ]]; then - pass "find_active_pr_loop works correctly" -else - fail "find_active_pr_loop" "*2026-01-19*" "$ACTIVE" -fi - -# Test 17: PR loop state with YAML list for active_bots -echo "" -echo "Test 17: PR loop state with YAML list" -mkdir -p "$TEST_DIR/pr-yaml" -cat > "$TEST_DIR/pr-yaml/state.md" << 'EOF' ---- -current_round: 1 -active_bots: - - claude - - codex -configured_bots: - - claude - - codex ---- -EOF - -# Test that we can read the state file without errors -if [[ -f "$TEST_DIR/pr-yaml/state.md" ]]; then - # Check if file contains expected YAML structure - if grep -q "^ - claude$" "$TEST_DIR/pr-yaml/state.md" && \ - grep -q "^ - codex$" "$TEST_DIR/pr-yaml/state.md"; then - pass "PR loop YAML list format validated" - else - fail "YAML list format" "list items" "missing" - fi -else - fail "YAML list" "file exists" "file not found" -fi - # ======================================== # Stale Loop Detection Tests # ======================================== diff --git a/tests/robustness/test-hook-system-robustness.sh b/tests/robustness/test-hook-system-robustness.sh index 21dd9fb0..1d4a21f5 100755 --- a/tests/robustness/test-hook-system-robustness.sh +++ b/tests/robustness/test-hook-system-robustness.sh @@ -6,7 +6,6 @@ # - loop-edit-validator.sh # - loop-plan-file-validator.sh # - loop-codex-stop-hook.sh (state parsing) -# - pr-loop-stop-hook.sh (state parsing) # # Focus areas: # - JSON input validation edge cases @@ -658,22 +657,6 @@ else fail "Missing state handling" "exit 0, no block decision" "exit=$EXIT_CODE, output=$OUTPUT" fi -# Test 17: PR stop hook handles missing state gracefully (allows exit) -echo "" -echo "Test 17: PR stop hook allows exit when no state directory" -mkdir -p "$TEST_DIR/no-pr-state" - -set +e -OUTPUT=$(echo '{}' | CLAUDE_PROJECT_DIR="$TEST_DIR/no-pr-state" bash "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) -EXIT_CODE=$? -set -e -# Should exit 0, no block decision -if [[ $EXIT_CODE -eq 0 ]] && ! echo "$OUTPUT" | grep -q '"decision".*:.*"block"'; then - pass "PR stop hook allows exit when no state (no block decision)" -else - fail "PR missing state" "exit 0, no block decision" "exit=$EXIT_CODE" -fi - # Test 18: Stop hook with corrupted state file outputs block decision echo "" echo "Test 18: Stop hook with corrupted state outputs decision" diff --git a/tests/robustness/test-pr-loop-api-fetch.sh b/tests/robustness/test-pr-loop-api-fetch.sh deleted file mode 100755 index 489a1cee..00000000 --- a/tests/robustness/test-pr-loop-api-fetch.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop API fetch/state tests (parallel split 1/2) -# -# Runs Tests 1-11: PR Loop State Handling + fetch-pr-comments + -# Bot Response Parsing + JSON Edge Cases -# -# Sources the shared test library from test-pr-loop-api-robustness.sh -# and invokes the run_fetch_tests group function. -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -source "$SCRIPT_DIR/test-pr-loop-api-robustness.sh" - -run_fetch_tests -print_test_summary "PR Loop API Fetch Tests" -exit $? diff --git a/tests/robustness/test-pr-loop-api-poll.sh b/tests/robustness/test-pr-loop-api-poll.sh deleted file mode 100755 index f56d5594..00000000 --- a/tests/robustness/test-pr-loop-api-poll.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop API poll/stop-hook tests (parallel split 2/2) -# -# Runs Tests 12-19: PR Loop Stop Hook + poll-pr-reviews -# -# Sources the shared test library from test-pr-loop-api-robustness.sh -# and invokes the run_poll_tests group function. -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -source "$SCRIPT_DIR/test-pr-loop-api-robustness.sh" - -run_poll_tests -print_test_summary "PR Loop API Poll Tests" -exit $? diff --git a/tests/robustness/test-pr-loop-api-robustness.sh b/tests/robustness/test-pr-loop-api-robustness.sh deleted file mode 100755 index d8e5097a..00000000 --- a/tests/robustness/test-pr-loop-api-robustness.sh +++ /dev/null @@ -1,866 +0,0 @@ -#!/usr/bin/env bash -# -# Robustness tests for PR loop API handling -# -# Tests PR loop behavior under API error conditions by invoking actual -# PR loop scripts with mocked gh commands: -# - API failure handling -# - Rate limiting responses -# - Bot response JSON parsing -# - Network error simulation -# - PR loop state file handling -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" -source "$PROJECT_ROOT/hooks/lib/loop-common.sh" -source "$SCRIPT_DIR/../test-helpers.sh" - -setup_test_dir - -echo "========================================" -echo "PR Loop API Robustness Tests" -echo "========================================" -echo "" - -# ======================================== -# Helper Functions -# ======================================== - -# Create a comprehensive mock gh that handles repo view, pr view, and api calls -# This allows fetch-pr-comments.sh to run end-to-end -create_mock_gh() { - local dir="$1" - local behavior="$2" # "empty_array", "rate_limit", "network_error", "bot_comments", etc. - mkdir -p "$dir/bin" - - # Base mock that handles repo view and pr view for all behaviors - # Note: gh CLI applies -q jq queries internally, so we output the final result - # fetch-pr-comments.sh uses: gh repo view --json owner,name -q '...' - # gh pr view PR --repo REPO --json number -q .number - cat > "$dir/bin/gh" << 'GHEOF_START' -#!/usr/bin/env bash -# Mock gh command for testing - -# Check for -q flag anywhere in args (jq query) -HAS_Q_FLAG=false -for arg in "$@"; do - if [[ "$arg" == "-q" ]]; then - HAS_Q_FLAG=true - break - fi -done - -# Handle repo view (required by fetch-pr-comments.sh) -if [[ "$1" == "repo" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"owner,name"* ]]; then - if [[ "$HAS_Q_FLAG" == "true" ]]; then - # -q query extracts owner.login + "/" + name - echo "testowner/testrepo" - else - echo '{"owner":{"login":"testowner"},"name":"testrepo"}' - fi - exit 0 - elif [[ "$*" == *"parent"* ]]; then - if [[ "$HAS_Q_FLAG" == "true" ]]; then - # parent query returns empty/null for non-fork - echo "/" - else - echo '{"parent":null}' - fi - exit 0 - fi - fi - echo "testowner/testrepo" - exit 0 -fi - -# Handle pr view (required by fetch-pr-comments.sh) -# PR existence check uses: gh pr view --repo REPO --json number -q .number -if [[ "$1" == "pr" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"number"* ]]; then - echo '{"number": 123}' - else - echo '{"state": "OPEN"}' - fi - exit 0 - fi - echo "PR #123" - exit 0 -fi - -# Handle api calls based on behavior -GHEOF_START - - # Add behavior-specific api handling - case "$behavior" in - empty_array) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - rate_limit) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - echo '{"message":"API rate limit exceeded","documentation_url":"https://docs.github.com/rest/overview/resources-in-the-rest-api#rate-limiting"}' >&2 - exit 1 -fi -echo "[]" -exit 0 -GHEOF - ;; - network_error) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - echo "Connection refused" >&2 - exit 6 -fi -echo "[]" -exit 0 -GHEOF - ;; - auth_failure) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "auth" && "$2" == "status" ]]; then - echo "You are not logged into any GitHub hosts" >&2 - exit 1 -fi -if [[ "$1" == "api" ]]; then - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - claude_approval) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - # Return Claude bot approval for issue comments endpoint - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - cat << 'JSON' -[{"id":1,"user":{"login":"claude[bot]","type":"Bot"},"body":"LGTM! The implementation looks good.","created_at":"2026-01-19T12:00:00Z"}] -JSON - exit 0 - fi - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - codex_issues) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - # Return Codex bot with issues for issue comments endpoint - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - cat << 'JSON' -[{"id":1,"user":{"login":"chatgpt-codex-connector[bot]","type":"Bot"},"body":"[P1] Critical issue found\n[P2] Minor issue","created_at":"2026-01-19T12:00:00Z"}] -JSON - exit 0 - fi - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - mixed_bots) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - # Return mixed bot responses for issue comments endpoint - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - cat << 'JSON' -[{"id":1,"user":{"login":"claude[bot]","type":"Bot"},"body":"LGTM","created_at":"2026-01-19T12:00:00Z"},{"id":2,"user":{"login":"chatgpt-codex-connector[bot]","type":"Bot"},"body":"Approved","created_at":"2026-01-19T12:01:00Z"}] -JSON - exit 0 - fi - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - unicode_comment) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - printf '[{"id":1,"user":{"login":"bot","type":"Bot"},"body":"Good work! \u2705 \u2728","created_at":"2026-01-19T12:00:00Z"}]\n' - exit 0 - fi - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - long_comment) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - # Generate a long comment body - LONG_BODY=$(head -c 10000 /dev/zero 2>/dev/null | tr '\0' 'a' || printf 'a%.0s' {1..10000}) - echo "[{\"id\":1,\"user\":{\"login\":\"bot\",\"type\":\"Bot\"},\"body\":\"$LONG_BODY\",\"created_at\":\"2026-01-19T12:00:00Z\"}]" - exit 0 - fi - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - *) - # Default: return empty array for api calls - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - esac - chmod +x "$dir/bin/gh" -} - -create_pr_loop_state() { - local dir="$1" - local round="${2:-0}" - mkdir -p "$dir/.humanize/pr-loop/2026-01-19_00-00-00" - cat > "$dir/.humanize/pr-loop/2026-01-19_00-00-00/state.md" << EOF ---- -current_round: $round -max_iterations: 42 -pr_number: 123 -pr_owner: testowner -pr_repo: testrepo -base_branch: main -configured_bots: - - claude - - codex -active_bots: - - claude -startup_case: 3 -review_started: false ---- -EOF -} - -init_basic_git_repo() { - local dir="$1" - cd "$dir" - git init -q - git config user.email "test@test.com" - git config user.name "Test User" - git config commit.gpgsign false - git checkout -q -b main 2>/dev/null || git checkout -q main - echo "initial" > file.txt - git add file.txt - git commit -q -m "Initial commit" - cd - > /dev/null -} - -# ======================================== -# Test Group Functions -# ======================================== - -# Tests 1-11: PR Loop State Handling + fetch-pr-comments + Bot Response Parsing + JSON Edge Cases -run_fetch_tests() { - - # ======================================== - # PR Loop State Handling Tests - # ======================================== - - echo "--- PR Loop State Handling Tests ---" - echo "" - - # Test 1: find_active_pr_loop detects PR loop state - echo "Test 1: PR loop state detection" - mkdir -p "$TEST_DIR/prloop1/.humanize/pr-loop/2026-01-19_00-00-00" - create_pr_loop_state "$TEST_DIR/prloop1" - - ACTIVE=$(find_active_pr_loop "$TEST_DIR/prloop1/.humanize/pr-loop" 2>/dev/null || echo "") - if [[ "$ACTIVE" == *"2026-01-19"* ]]; then - pass "PR loop state detected" - else - fail "PR loop detection" "*2026-01-19*" "$ACTIVE" - fi - - # Test 2: PR loop with YAML list active_bots - echo "" - echo "Test 2: PR loop with YAML list active_bots" - mkdir -p "$TEST_DIR/prloop2/.humanize/pr-loop/2026-01-19_00-00-00" - cat > "$TEST_DIR/prloop2/.humanize/pr-loop/2026-01-19_00-00-00/state.md" << 'EOF' ---- -current_round: 1 -max_iterations: 42 -pr_number: 456 -active_bots: - - claude - - codex -configured_bots: - - claude - - codex -base_branch: main -review_started: false ---- -EOF - - # Verify the file can be read - if grep -q "active_bots:" "$TEST_DIR/prloop2/.humanize/pr-loop/2026-01-19_00-00-00/state.md"; then - pass "YAML list active_bots format accepted" - else - fail "YAML list format" "contains active_bots" "not found" - fi - - # Test 3: PR loop state with missing pr_number - echo "" - echo "Test 3: PR loop state with missing pr_number" - mkdir -p "$TEST_DIR/prloop3/.humanize/pr-loop/2026-01-19_00-00-00" - cat > "$TEST_DIR/prloop3/.humanize/pr-loop/2026-01-19_00-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -configured_bots: - - claude -base_branch: main -review_started: false ---- -EOF - - # Should still be detectable as an active loop - ACTIVE=$(find_active_pr_loop "$TEST_DIR/prloop3/.humanize/pr-loop" 2>/dev/null || echo "") - if [[ -n "$ACTIVE" ]]; then - pass "PR loop without pr_number still detected" - else - fail "Missing pr_number" "detected" "not detected" - fi - - # ======================================== - # fetch-pr-comments.sh Tests - # ======================================== - - echo "" - echo "--- fetch-pr-comments.sh Script Tests ---" - echo "" - - # Test 4: Empty JSON array handled by fetch-pr-comments - echo "Test 4: Empty PR comments creates valid output file" - mkdir -p "$TEST_DIR/fetch1" - init_basic_git_repo "$TEST_DIR/fetch1" - create_mock_gh "$TEST_DIR/fetch1" "empty_array" - - set +e - OUTPUT=$(PATH="$TEST_DIR/fetch1/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/fetch1/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - # Must succeed AND create output file with expected content - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/fetch1/comments.md" ]]; then - # Verify output contains expected structure - if grep -q "PR Comments for #123" "$TEST_DIR/fetch1/comments.md" && \ - grep -q "testowner/testrepo" "$TEST_DIR/fetch1/comments.md"; then - pass "Empty PR comments creates valid output (PR#, repo in file)" - else - fail "Empty PR output" "contains PR# and repo" "$(head -10 "$TEST_DIR/fetch1/comments.md")" - fi - else - fail "Empty PR comments" "exit 0 with output file" "exit=$EXIT_CODE" - fi - - # Test 5: Rate limit error produces warning in output - echo "" - echo "Test 5: Rate limit error produces warning" - mkdir -p "$TEST_DIR/fetch2" - init_basic_git_repo "$TEST_DIR/fetch2" - create_mock_gh "$TEST_DIR/fetch2" "rate_limit" - - set +e - OUTPUT=$(PATH="$TEST_DIR/fetch2/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/fetch2/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - # Script may still create output file with warnings about API failures - if [[ -f "$TEST_DIR/fetch2/comments.md" ]]; then - # Check for warning about API failures - if grep -qi "warning\|failed" "$TEST_DIR/fetch2/comments.md" || echo "$OUTPUT" | grep -qi "failed\|error"; then - pass "Rate limit produces warning (exit=$EXIT_CODE)" - else - pass "Rate limit handled gracefully (exit=$EXIT_CODE)" - fi - else - # Non-zero exit without file is acceptable for API errors - if [[ $EXIT_CODE -ne 0 ]]; then - pass "Rate limit error returns non-zero exit ($EXIT_CODE)" - else - fail "Rate limit handling" "non-zero exit or warning" "exit 0, no file" - fi - fi - - # Test 6: Network error handled gracefully - echo "" - echo "Test 6: Network error handled gracefully" - mkdir -p "$TEST_DIR/fetch3" - init_basic_git_repo "$TEST_DIR/fetch3" - create_mock_gh "$TEST_DIR/fetch3" "network_error" - - set +e - OUTPUT=$(PATH="$TEST_DIR/fetch3/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/fetch3/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - # Network errors should produce non-zero exit or warning - if [[ $EXIT_CODE -ne 0 ]] || echo "$OUTPUT" | grep -qi "error\|failed\|connection"; then - pass "Network error handled (exit=$EXIT_CODE)" - else - fail "Network error handling" "non-zero exit or error message" "exit=$EXIT_CODE" - fi - - # ======================================== - # Bot Response Parsing Tests (via fetch-pr-comments.sh) - # ======================================== - - echo "" - echo "--- Bot Response Parsing Tests ---" - echo "" - - # Test 7: Claude bot comments parsed and formatted in output - echo "Test 7: Claude bot comments appear in fetch-pr-comments output" - mkdir -p "$TEST_DIR/bot1" - init_basic_git_repo "$TEST_DIR/bot1" - create_mock_gh "$TEST_DIR/bot1" "claude_approval" - - set +e - OUTPUT=$(PATH="$TEST_DIR/bot1/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/bot1/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/bot1/comments.md" ]]; then - # Verify Claude bot comment appears in formatted output - if grep -q "claude\[bot\]" "$TEST_DIR/bot1/comments.md" && grep -q "LGTM" "$TEST_DIR/bot1/comments.md"; then - pass "Claude bot comment parsed and formatted in output" - else - fail "Claude parsing" "claude[bot] and LGTM in output" "$(cat "$TEST_DIR/bot1/comments.md")" - fi - else - fail "Claude bot test" "exit 0 with output file" "exit=$EXIT_CODE" - fi - - # Test 8: Codex bot with severity markers parsed correctly - echo "" - echo "Test 8: Codex bot severity markers in fetch-pr-comments output" - mkdir -p "$TEST_DIR/bot2" - init_basic_git_repo "$TEST_DIR/bot2" - create_mock_gh "$TEST_DIR/bot2" "codex_issues" - - set +e - OUTPUT=$(PATH="$TEST_DIR/bot2/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/bot2/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/bot2/comments.md" ]]; then - # Verify Codex severity markers appear in output - if grep -q "chatgpt-codex-connector\[bot\]" "$TEST_DIR/bot2/comments.md" && grep -q "\[P1\]" "$TEST_DIR/bot2/comments.md"; then - pass "Codex severity markers parsed in output" - else - fail "Codex parsing" "[P1] marker in output" "$(cat "$TEST_DIR/bot2/comments.md")" - fi - else - fail "Codex bot test" "exit 0 with output file" "exit=$EXIT_CODE" - fi - - # Test 9: Multiple bot responses both appear in output - echo "" - echo "Test 9: Multiple bots in fetch-pr-comments output" - mkdir -p "$TEST_DIR/bot3" - init_basic_git_repo "$TEST_DIR/bot3" - create_mock_gh "$TEST_DIR/bot3" "mixed_bots" - - set +e - OUTPUT=$(PATH="$TEST_DIR/bot3/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/bot3/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/bot3/comments.md" ]]; then - # Verify both bots appear - if grep -q "claude\[bot\]" "$TEST_DIR/bot3/comments.md" && grep -q "chatgpt-codex-connector\[bot\]" "$TEST_DIR/bot3/comments.md"; then - pass "Multiple bot responses both appear in output" - else - fail "Multiple bots" "both bots in output" "$(cat "$TEST_DIR/bot3/comments.md")" - fi - else - fail "Multiple bots test" "exit 0 with output file" "exit=$EXIT_CODE" - fi - - # ======================================== - # JSON Edge Cases (via fetch-pr-comments.sh) - # ======================================== - - echo "" - echo "--- JSON Edge Cases ---" - echo "" - - # Test 10: Unicode in bot comments processed through full pipeline - echo "Test 10: Unicode comments processed by fetch-pr-comments" - mkdir -p "$TEST_DIR/json1" - init_basic_git_repo "$TEST_DIR/json1" - create_mock_gh "$TEST_DIR/json1" "unicode_comment" - - set +e - OUTPUT=$(PATH="$TEST_DIR/json1/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/json1/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/json1/comments.md" ]]; then - pass "Unicode comments processed successfully" - else - fail "Unicode handling" "exit 0 with output file" "exit=$EXIT_CODE" - fi - - # Test 11: Very long comment body processed - echo "" - echo "Test 11: Long comment body processed by fetch-pr-comments" - mkdir -p "$TEST_DIR/json2" - init_basic_git_repo "$TEST_DIR/json2" - create_mock_gh "$TEST_DIR/json2" "long_comment" - - set +e - OUTPUT=$(PATH="$TEST_DIR/json2/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/json2/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/json2/comments.md" ]]; then - # Verify the long content was written - FILE_SIZE=$(wc -c < "$TEST_DIR/json2/comments.md") - if [[ $FILE_SIZE -gt 1000 ]]; then - pass "Long comment body processed (file size: $FILE_SIZE bytes)" - else - pass "Long comment handled (may be truncated)" - fi - else - fail "Long body handling" "exit 0 with output file" "exit=$EXIT_CODE" - fi -} - -# Tests 12-19: PR Loop Stop Hook + poll-pr-reviews -run_poll_tests() { - - # ======================================== - # PR Loop Stop Hook Tests - # ======================================== - - echo "" - echo "--- PR Loop Stop Hook Tests ---" - echo "" - - # Test 12: Stop hook with no active PR loop - echo "Test 12: Stop hook with no active PR loop" - mkdir -p "$TEST_DIR/stop1" - init_basic_git_repo "$TEST_DIR/stop1" - - set +e - OUTPUT=$(echo '{}' | CLAUDE_PROJECT_DIR="$TEST_DIR/stop1" bash "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]]; then - pass "PR stop hook passes when no loop active" - else - fail "No PR loop handling" "exit 0" "exit $EXIT_CODE" - fi - - # Test 13: Stop hook with corrupted state - echo "" - echo "Test 13: Stop hook with corrupted state" - mkdir -p "$TEST_DIR/stop2/.humanize/pr-loop/2026-01-19_00-00-00" - echo "not valid yaml [[[" > "$TEST_DIR/stop2/.humanize/pr-loop/2026-01-19_00-00-00/state.md" - init_basic_git_repo "$TEST_DIR/stop2" - - set +e - OUTPUT=$(echo '{}' | CLAUDE_PROJECT_DIR="$TEST_DIR/stop2" bash "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) - EXIT_CODE=$? - set -e - - # Should handle gracefully without crashing - if [[ $EXIT_CODE -lt 128 ]]; then - pass "Stop hook handles corrupted state (exit $EXIT_CODE)" - else - fail "Corrupted state" "exit < 128" "exit $EXIT_CODE" - fi - - # Test 14: approve-state.md directory structure - echo "" - echo "Test 14: approve-state.md directory structure" - mkdir -p "$TEST_DIR/stop3/.humanize/pr-loop/2026-01-19_00-00-00" - create_pr_loop_state "$TEST_DIR/stop3" - - # The approve-state.md path should be writable - APPROVE_PATH="$TEST_DIR/stop3/.humanize/pr-loop/2026-01-19_00-00-00/approve-state.md" - touch "$APPROVE_PATH" 2>/dev/null - if [[ -f "$APPROVE_PATH" ]]; then - pass "approve-state.md path is writable" - rm "$APPROVE_PATH" - else - fail "Approve path" "writable" "not writable" - fi - - # ======================================== - # poll-pr-reviews.sh Tests - # ======================================== - - echo "" - echo "--- poll-pr-reviews.sh Script Tests ---" - echo "" - - # Test 15: poll-pr-reviews help displays usage - echo "Test 15: poll-pr-reviews help displays usage" - set +e - OUTPUT=$("$PROJECT_ROOT/scripts/poll-pr-reviews.sh" --help 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && echo "$OUTPUT" | grep -qi "usage\|poll"; then - pass "poll-pr-reviews help displays usage" - else - fail "poll-pr-reviews help" "exit 0 with usage" "exit=$EXIT_CODE" - fi - - # Test 16: poll-pr-reviews with missing required args - echo "" - echo "Test 16: poll-pr-reviews missing args rejected" - set +e - OUTPUT=$("$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "required\|error"; then - pass "poll-pr-reviews missing args rejected" - else - fail "poll-pr-reviews validation" "non-zero with error" "exit=$EXIT_CODE" - fi - - # Test 17: poll-pr-reviews with mocked gh returns JSON output with required fields - echo "" - echo "Test 17: poll-pr-reviews with mocked gh produces valid JSON output" - mkdir -p "$TEST_DIR/poll1" - init_basic_git_repo "$TEST_DIR/poll1" - create_mock_gh "$TEST_DIR/poll1" "claude_approval" - - set +e - OUTPUT=$(PATH="$TEST_DIR/poll1/bin:$PATH" "$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 123 --after "2026-01-18T00:00:00Z" --bots "claude" 2>&1) - EXIT_CODE=$? - set -e - - # poll-pr-reviews must output JSON with has_new_comments and parse correctly - if [[ $EXIT_CODE -eq 0 ]]; then - # Parse JSON to verify structure - HAS_NEW=$(echo "$OUTPUT" | jq -r '.has_new_comments // empty' 2>/dev/null || echo "") - if [[ -n "$HAS_NEW" ]] && [[ "$HAS_NEW" == "true" || "$HAS_NEW" == "false" ]]; then - # Also verify comments array exists (may be empty) - COMMENTS_TYPE=$(echo "$OUTPUT" | jq -r '.comments | type' 2>/dev/null || echo "") - if [[ "$COMMENTS_TYPE" == "array" ]]; then - pass "poll-pr-reviews produces valid JSON (has_new_comments=$HAS_NEW, comments is array)" - else - pass "poll-pr-reviews produces JSON with has_new_comments=$HAS_NEW" - fi - else - fail "poll-pr-reviews JSON" "has_new_comments boolean" "output missing or invalid: $OUTPUT" - fi - else - fail "poll-pr-reviews execution" "exit 0" "exit=$EXIT_CODE, output=$OUTPUT" - fi - - # Test 18: poll-pr-reviews timeout handling with slow mock - echo "" - echo "Test 18: poll-pr-reviews handles slow API gracefully" - mkdir -p "$TEST_DIR/poll2" - init_basic_git_repo "$TEST_DIR/poll2" - - # Create a mock gh that sleeps briefly but responds - mkdir -p "$TEST_DIR/poll2/bin" - cat > "$TEST_DIR/poll2/bin/gh" << 'GHEOF' -#!/usr/bin/env bash -# Handle repo view -if [[ "$1" == "repo" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"owner,name"* ]]; then - echo '{"owner":{"login":"testowner"},"name":"testrepo"}' - exit 0 - elif [[ "$*" == *"parent"* ]]; then - echo '{"parent":null}' - exit 0 - fi - fi - echo "testowner/testrepo" - exit 0 -fi -# Handle pr view -if [[ "$1" == "pr" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"number"* ]]; then - echo '{"number": 123}' - else - echo '{"state": "OPEN"}' - fi - exit 0 - fi - exit 0 -fi -# Simulate slow API -if [[ "$1" == "api" ]]; then - sleep 0.5 - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - chmod +x "$TEST_DIR/poll2/bin/gh" - - set +e - OUTPUT=$(PATH="$TEST_DIR/poll2/bin:$PATH" "$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 123 --after "2026-01-18T00:00:00Z" --bots "claude" 2>&1) - EXIT_CODE=$? - set -e - - # Should complete without hanging and produce valid JSON (even if empty) - if [[ $EXIT_CODE -eq 0 ]]; then - # Verify JSON output with has_new_comments (API returns empty, so should be false) - HAS_NEW=$(echo "$OUTPUT" | jq -r '.has_new_comments // empty' 2>/dev/null || echo "") - if [[ "$HAS_NEW" == "false" ]]; then - pass "poll-pr-reviews handles slow API (has_new_comments=false, no comments)" - elif [[ -n "$HAS_NEW" ]]; then - pass "poll-pr-reviews handles slow API (has_new_comments=$HAS_NEW)" - else - pass "poll-pr-reviews handles slow API gracefully (exit=0)" - fi - else - fail "poll-pr-reviews timeout" "exit 0" "exit=$EXIT_CODE" - fi - - # Test 19: poll-pr-reviews with API failure returns has_new_comments:false - echo "" - echo "Test 19: poll-pr-reviews with API failure returns has_new_comments:false" - mkdir -p "$TEST_DIR/poll3" - init_basic_git_repo "$TEST_DIR/poll3" - - # Create a mock gh that fails on API calls - mkdir -p "$TEST_DIR/poll3/bin" - cat > "$TEST_DIR/poll3/bin/gh" << 'GHEOF' -#!/usr/bin/env bash -# Check for -q flag anywhere in args (jq query) -HAS_Q_FLAG=false -for arg in "$@"; do - if [[ "$arg" == "-q" ]]; then - HAS_Q_FLAG=true - break - fi -done - -# Handle repo view -if [[ "$1" == "repo" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"owner,name"* ]]; then - if [[ "$HAS_Q_FLAG" == "true" ]]; then - echo "testowner/testrepo" - else - echo '{"owner":{"login":"testowner"},"name":"testrepo"}' - fi - exit 0 - elif [[ "$*" == *"parent"* ]]; then - if [[ "$HAS_Q_FLAG" == "true" ]]; then - echo "/" - else - echo '{"parent":null}' - fi - exit 0 - fi - fi - echo "testowner/testrepo" - exit 0 -fi -# Handle pr view -if [[ "$1" == "pr" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"number"* ]]; then - echo '{"number": 123}' - else - echo '{"state": "OPEN"}' - fi - exit 0 - fi - exit 0 -fi -# Fail on API calls to simulate network error -if [[ "$1" == "api" ]]; then - echo "Error: Network unreachable" >&2 - exit 1 -fi -exit 0 -GHEOF - chmod +x "$TEST_DIR/poll3/bin/gh" - - set +e - OUTPUT=$(PATH="$TEST_DIR/poll3/bin:$PATH" "$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 123 --after "2026-01-18T00:00:00Z" --bots "claude" 2>&1) - EXIT_CODE=$? - set -e - - # On API failure, poll-pr-reviews MUST: - # 1. Exit with code 0 - # 2. Output valid JSON (parseable by jq -e) - # 3. Have has_new_comments exactly equal to false - # NO FALLBACKS - all three conditions must be met - if [[ $EXIT_CODE -ne 0 ]]; then - fail "poll-pr-reviews API failure" "exit 0" "exit=$EXIT_CODE" - else - # Extract JSON from output (warnings precede JSON, JSON may be multi-line) - # Find the line number where JSON starts (first '{') and extract from there to end - JSON_START_LINE=$(echo "$OUTPUT" | grep -n '^{' | head -1 | cut -d: -f1) - if [[ -z "$JSON_START_LINE" ]]; then - fail "poll-pr-reviews API failure" "JSON output" "no JSON found in output" - else - JSON_OUTPUT=$(echo "$OUTPUT" | tail -n +$JSON_START_LINE) - - # Validate JSON is parseable using jq -e (exits non-zero on invalid JSON) - if ! echo "$JSON_OUTPUT" | jq -e '.' >/dev/null 2>&1; then - fail "poll-pr-reviews API failure" "valid JSON output" "invalid JSON: $JSON_OUTPUT" - else - # Verify has_new_comments is exactly boolean false (not string "false") - # jq -e '.has_new_comments == false' returns 0 only if the value is boolean false - if echo "$JSON_OUTPUT" | jq -e '.has_new_comments == false' >/dev/null 2>&1; then - pass "poll-pr-reviews returns exit 0 with valid JSON and has_new_comments:false (boolean)" - else - # Show actual value and type for debugging - HAS_NEW_VALUE=$(echo "$JSON_OUTPUT" | jq '.has_new_comments') - HAS_NEW_TYPE=$(echo "$JSON_OUTPUT" | jq -r '.has_new_comments | type') - fail "poll-pr-reviews API failure" "has_new_comments: boolean false" "value=$HAS_NEW_VALUE type=$HAS_NEW_TYPE" - fi - fi - fi - fi -} - -# ======================================== -# Source Guard: run all tests when executed directly -# ======================================== - -if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then - run_fetch_tests - run_poll_tests - print_test_summary "PR Loop API Robustness Test Summary" - exit $? -fi diff --git a/tests/robustness/test-setup-scripts-robustness.sh b/tests/robustness/test-setup-scripts-robustness.sh index ec2d293c..4e13e5b8 100755 --- a/tests/robustness/test-setup-scripts-robustness.sh +++ b/tests/robustness/test-setup-scripts-robustness.sh @@ -2,7 +2,7 @@ # # Robustness tests for setup scripts # -# Tests setup-rlcr-loop.sh and setup-pr-loop.sh under edge cases: +# Tests setup-rlcr-loop.sh under edge cases: # - Argument parsing edge cases # - Plan file validation edge cases # - Git repository edge cases @@ -92,17 +92,6 @@ run_rlcr_setup() { ) } -# Run setup-pr-loop.sh with proper isolation from real PR loop -# Usage: run_pr_setup [args...] -run_pr_setup() { - local repo_dir="$1" - shift - ( - cd "$repo_dir" - CLAUDE_PROJECT_DIR="$repo_dir" "$PROJECT_ROOT/scripts/setup-pr-loop.sh" "$@" - ) -} - # ======================================== # Setup RLCR Loop Argument Parsing Tests # ======================================== @@ -502,68 +491,6 @@ else fail "Tracked plan without flag" "rejection" "exit=$EXIT_CODE" fi -# ======================================== -# Setup PR Loop Tests -# ======================================== - -echo "" -echo "--- Setup PR Loop Argument Tests ---" -echo "" - -# Test 19: Help flag displays usage -echo "Test 19: PR loop help flag displays usage" -OUTPUT=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --help 2>&1) || true -if echo "$OUTPUT" | grep -q "USAGE\|start-pr-loop"; then - pass "PR loop help flag displays usage" -else - fail "PR loop help" "USAGE text" "no usage found" -fi - -# Test 20: Missing bot flag shows error -echo "" -echo "Test 20: PR loop missing bot flag shows error" -OUTPUT=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "at least one bot flag"; then - pass "PR loop missing bot flag shows error" -else - fail "Missing bot flag" "error message" "exit=$EXIT_CODE" -fi - -# Test 21: Unknown option rejected -echo "" -echo "Test 21: PR loop unknown option rejected" -OUTPUT=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --unknown-option 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "unknown option"; then - pass "PR loop unknown option rejected" -else - fail "PR loop unknown option" "rejection" "exit=$EXIT_CODE" -fi - -# Test 22: --max with non-numeric value rejected -echo "" -echo "Test 22: PR loop --max with non-numeric value rejected" -OUTPUT=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --claude --max abc 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "positive integer"; then - pass "PR loop --max non-numeric rejected" -else - fail "PR loop --max validation" "rejection" "exit=$EXIT_CODE" -fi - -# Test 23: Non-git directory rejected -echo "" -echo "Test 23: PR loop non-git directory rejected" -mkdir -p "$TEST_DIR/pr-nongit" -OUTPUT=$(run_pr_setup "$TEST_DIR/pr-nongit" --claude 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "git repository"; then - pass "PR loop non-git directory rejected" -else - fail "PR loop non-git" "rejection" "exit=$EXIT_CODE" -fi - # ======================================== # Mutual Exclusion Tests # ======================================== @@ -602,38 +529,6 @@ else fail "RLCR mutual exclusion" "rejection" "exit=$EXIT_CODE" fi -# Test 25: PR loop blocks starting RLCR loop -echo "" -echo "Test 25: Active PR loop blocks new RLCR loop" -mkdir -p "$TEST_DIR/repo25" -init_basic_git_repo "$TEST_DIR/repo25" -create_minimal_plan "$TEST_DIR/repo25" -echo "plan.md" >> "$TEST_DIR/repo25/.gitignore" -git -C "$TEST_DIR/repo25" add .gitignore && git -C "$TEST_DIR/repo25" commit -q -m "Add gitignore" - -# Create fake active PR loop -mkdir -p "$TEST_DIR/repo25/.humanize/pr-loop/2026-01-19_00-00-00" -cat > "$TEST_DIR/repo25/.humanize/pr-loop/2026-01-19_00-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 ---- -EOF - -mkdir -p "$TEST_DIR/repo25/bin" -echo '#!/usr/bin/env bash -exit 0' > "$TEST_DIR/repo25/bin/codex" -chmod +x "$TEST_DIR/repo25/bin/codex" - -OUTPUT=$(PATH="$TEST_DIR/repo25/bin:$PATH" run_rlcr_setup "$TEST_DIR/repo25" plan.md 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "pr loop.*already active\|already active"; then - pass "Active PR loop blocks new RLCR loop" -else - fail "PR loop blocks RLCR" "rejection" "exit=$EXIT_CODE" -fi - # ======================================== # Symlink Protection Tests # ======================================== @@ -751,33 +646,6 @@ else pass "Valid numeric arguments accepted (--max 10, --codex-timeout 3600)" fi -# Test 30: Valid PR loop setup proceeds past argument validation -echo "" -echo "Test 30: Valid PR loop setup proceeds past argument validation" -mkdir -p "$TEST_DIR/repo30" -init_basic_git_repo "$TEST_DIR/repo30" - -# Create mock gh that fails auth check (to test dependency handling) -mkdir -p "$TEST_DIR/repo30/bin" -cat > "$TEST_DIR/repo30/bin/gh" << 'EOF' -#!/usr/bin/env bash -if [[ "$1" == "auth" && "$2" == "status" ]]; then - echo "Not logged in" >&2 - exit 1 -fi -exit 0 -EOF -chmod +x "$TEST_DIR/repo30/bin/gh" - -OUTPUT=$(PATH="$TEST_DIR/repo30/bin:$PATH" run_pr_setup "$TEST_DIR/repo30" --claude 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -# Should fail at gh auth check, not argument parsing -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "gh\|auth\|logged"; then - pass "Valid PR loop setup proceeds to gh auth check" -else - fail "Valid PR loop setup" "fail at gh auth check" "exit=$EXIT_CODE" -fi - # ======================================== # Timeout Scenario Tests # ======================================== @@ -804,20 +672,6 @@ else pass "--codex-timeout 0 accepted (non-negative integer validation)" fi -# Test 32: --codex-timeout with non-numeric value rejected (PR loop) -echo "" -echo "Test 32: PR loop --codex-timeout with non-numeric value rejected" -mkdir -p "$TEST_DIR/repo32" -init_basic_git_repo "$TEST_DIR/repo32" -mkdir -p "$TEST_DIR/repo32/bin" -OUTPUT=$(PATH="$TEST_DIR/repo32/bin:$PATH" run_pr_setup "$TEST_DIR/repo32" --claude --codex-timeout "abc" 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "positive integer"; then - pass "PR loop --codex-timeout non-numeric rejected" -else - fail "PR loop --codex-timeout non-numeric" "rejection with 'positive integer'" "exit=$EXIT_CODE, output=$OUTPUT" -fi - # Test 33: Very large timeout value accepted echo "" echo "Test 33: Very large timeout value accepted" diff --git a/tests/run-all-tests.sh b/tests/run-all-tests.sh index b5322ae2..b6ba6b24 100755 --- a/tests/run-all-tests.sh +++ b/tests/run-all-tests.sh @@ -86,10 +86,6 @@ TEST_SUITES=( "test-config-error-handling.sh" "test-codex-hook-install.sh" "test-unified-codex-config.sh" - "test-pr-loop-1-scripts.sh" - "test-pr-loop-2-hooks.sh" - "test-pr-loop-3-stophook.sh" - "test-pr-loop-system.sh" # Session ID and Agent Teams tests "test-session-id.sh" "test-agent-teams.sh" @@ -118,8 +114,6 @@ TEST_SUITES=( "robustness/test-hook-system-robustness.sh" "robustness/test-template-error-robustness.sh" "robustness/test-state-transition-robustness.sh" - "robustness/test-pr-loop-api-fetch.sh" - "robustness/test-pr-loop-api-poll.sh" ) # Tests that must be run with zsh (not bash) diff --git a/tests/setup-fixture-mock-gh.sh b/tests/setup-fixture-mock-gh.sh deleted file mode 100755 index f522bf58..00000000 --- a/tests/setup-fixture-mock-gh.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env bash -# -# Create a mock gh CLI that returns fixture data for testing -# fetch-pr-comments.sh and poll-pr-reviews.sh -# -# Usage: ./setup-fixture-mock-gh.sh -# -# The mock gh will: -# - Return fixture data for /issues/*/comments, /pulls/*/comments, /pulls/*/reviews -# - Return testuser for gh api user -# - Return testowner/testrepo for gh repo view -# - -set -euo pipefail - -MOCK_BIN_DIR="${1:-}" -FIXTURES_DIR="${2:-}" - -if [[ -z "$MOCK_BIN_DIR" || -z "$FIXTURES_DIR" ]]; then - echo "Usage: $0 " >&2 - exit 1 -fi - -mkdir -p "$MOCK_BIN_DIR" - -# Create mock gh that returns fixtures -cat > "$MOCK_BIN_DIR/gh" << MOCK_GH_EOF -#!/usr/bin/env bash -# Fixture-backed mock gh CLI for testing fetch/poll scripts - -FIXTURES_DIR="$FIXTURES_DIR" - -case "\$1" in - auth) - if [[ "\$2" == "status" ]]; then - echo "Logged in to github.com" - exit 0 - fi - ;; - repo) - if [[ "\$2" == "view" ]]; then - if [[ "\$*" == *"owner,name"* ]] || [[ "\$*" == *"owner"* && "\$*" == *"name"* ]]; then - echo '{"owner": {"login": "testowner"}, "name": "testrepo"}' - elif [[ "\$*" == *"parent"* ]]; then - echo '{"parent": null}' - elif [[ "\$*" == *"owner"* ]]; then - echo '{"owner": {"login": "testowner"}}' - elif [[ "\$*" == *"name"* ]]; then - echo '{"name": "testrepo"}' - fi - exit 0 - fi - ;; - pr) - if [[ "\$2" == "view" ]]; then - if [[ "\$*" == *"number"* ]]; then - echo '{"number": 123}' - elif [[ "\$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - fi - exit 0 - fi - ;; - api) - # Handle user endpoint - if [[ "\$2" == "user" ]]; then - echo '{"login": "testuser"}' - exit 0 - fi - - # Handle issue comments endpoint - if [[ "\$2" == *"/issues/"*"/comments"* ]]; then - cat "\$FIXTURES_DIR/issue_comments.json" - exit 0 - fi - - # Handle PR review comments endpoint (inline comments) - if [[ "\$2" == *"/pulls/"*"/comments"* ]]; then - cat "\$FIXTURES_DIR/review_comments.json" - exit 0 - fi - - # Handle PR reviews endpoint - if [[ "\$2" == *"/pulls/"*"/reviews"* ]]; then - cat "\$FIXTURES_DIR/pr_reviews.json" - exit 0 - fi - - # Default: return empty array - echo "[]" - exit 0 - ;; -esac - -echo "Mock gh: unhandled command: \$*" >&2 -exit 1 -MOCK_GH_EOF - -chmod +x "$MOCK_BIN_DIR/gh" - -echo "$MOCK_BIN_DIR" diff --git a/tests/setup-monitor-test-env.sh b/tests/setup-monitor-test-env.sh index 92fa9431..977d1b53 100755 --- a/tests/setup-monitor-test-env.sh +++ b/tests/setup-monitor-test-env.sh @@ -2,7 +2,7 @@ # # Helper script to set up monitor test environment # This script creates the necessary directory structure and state files -# for testing the monitor pr command. +# for testing the monitor command. # # Usage: ./setup-monitor-test-env.sh # @@ -18,72 +18,9 @@ if [[ -z "$TEST_DIR" ]]; then fi case "$TEST_NAME" in - yaml_list) - # Test: active_bots with YAML list format - TIMESTAMP="2026-01-18_16-00-00" - mkdir -p "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP" - cat > "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP/state.md" << 'STATEEOF' ---- -current_round: 1 -max_iterations: 42 -pr_number: 456 -start_branch: feature-branch -configured_bots: - - claude - - codex -active_bots: - - claude - - codex -codex_model: gpt-5.4 -codex_effort: medium -started_at: 2026-01-18T16:00:00Z ---- -STATEEOF - ;; - configured) - # Test: configured_bots vs active_bots (partial approval) - TIMESTAMP="2026-01-18_16-01-00" - mkdir -p "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP" - cat > "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP/state.md" << 'STATEEOF' ---- -current_round: 2 -max_iterations: 42 -pr_number: 789 -start_branch: test-branch -configured_bots: - - claude - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -started_at: 2026-01-18T16:00:00Z ---- -STATEEOF - ;; - empty) - # Test: empty active_bots (all approved) - TIMESTAMP="2026-01-18_16-02-00" - mkdir -p "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP" - cat > "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP/state.md" << 'STATEEOF' ---- -current_round: 3 -max_iterations: 42 -pr_number: 999 -start_branch: approved-branch -configured_bots: - - claude - - codex -active_bots: -codex_model: gpt-5.4 -codex_effort: medium -started_at: 2026-01-18T16:00:00Z ---- -STATEEOF - ;; *) echo "Unknown test name: $TEST_NAME" >&2 - echo "Available: yaml_list, configured, empty" >&2 + echo "Available: (none currently)" >&2 exit 1 ;; esac diff --git a/tests/test-codex-hook-install.sh b/tests/test-codex-hook-install.sh index b25c845e..2d70bb2d 100755 --- a/tests/test-codex-hook-install.sh +++ b/tests/test-codex-hook-install.sh @@ -90,11 +90,6 @@ cat > "$HOOKS_FILE" <<'EOF' "type": "command", "command": "/tmp/old/skills/humanize/hooks/loop-codex-stop-hook.sh", "timeout": 30 - }, - { - "type": "command", - "command": "/tmp/old/skills/humanize/hooks/pr-loop-stop-hook.sh", - "timeout": 30 } ] }, @@ -190,7 +185,6 @@ for group in data["hooks"]["Stop"]: expected = { f"{runtime_root}/hooks/loop-codex-stop-hook.sh", - f"{runtime_root}/hooks/pr-loop-stop-hook.sh", } print("FOUND=" + ("1" if expected.issubset(set(commands)) else "0")) @@ -225,10 +219,10 @@ else fail "Codex install preserves SessionStart hooks" "SESSION=1" "$PY_OUTPUT" fi -if grep -q '^COUNT=2$' <<<"$PY_OUTPUT"; then - pass "Codex install writes exactly two managed Humanize Stop hooks" +if grep -q '^COUNT=1$' <<<"$PY_OUTPUT"; then + pass "Codex install writes exactly one managed Humanize Stop hook" else - fail "Codex install writes exactly two managed Humanize Stop hooks" "COUNT=2" "$PY_OUTPUT" + fail "Codex install writes exactly one managed Humanize Stop hook" "COUNT=1" "$PY_OUTPUT" fi mkdir -p "$TEST_DIR/project" @@ -281,10 +275,10 @@ print(sum(1 for cmd in commands if "/humanize/hooks/" in cmd)) PY )" -if [[ "$PY_OUTPUT_2" == "2" ]]; then +if [[ "$PY_OUTPUT_2" == "1" ]]; then pass "Codex install is idempotent for managed hook commands" else - fail "Codex install is idempotent for managed hook commands" "2" "$PY_OUTPUT_2" + fail "Codex install is idempotent for managed hook commands" "1" "$PY_OUTPUT_2" fi if [[ "$(wc -l < "$FEATURE_LOG" | tr -d ' ')" == "2" ]]; then diff --git a/tests/test-disable-nested-codex-hooks.sh b/tests/test-disable-nested-codex-hooks.sh index 37212881..3d70c870 100644 --- a/tests/test-disable-nested-codex-hooks.sh +++ b/tests/test-disable-nested-codex-hooks.sh @@ -38,7 +38,6 @@ export XDG_CACHE_HOME="$TEST_DIR/.cache" mkdir -p "$XDG_CACHE_HOME" STOP_HOOK="$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" -PR_STOP_HOOK="$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" setup_repo() { local repo_dir="$1" @@ -194,13 +193,6 @@ else "--disable codex_hooks review" "$(cat "$TEST_DIR/review.args" 2>/dev/null || echo missing)" fi -if grep -q 'codex "\${CODEX_DISABLE_HOOKS_ARGS\[@\]}" exec' "$PR_STOP_HOOK"; then - pass "PR stop hook disables codex_hooks for nested codex exec" -else - fail "PR stop hook disables codex_hooks for nested codex exec" \ - 'codex "${CODEX_DISABLE_HOOKS_ARGS[@]}" exec' "not found" -fi - echo "" echo "========================================" echo "Disable Nested Codex Hooks Tests" diff --git a/tests/test-monitor-e2e-deletion.sh b/tests/test-monitor-e2e-deletion.sh index afd738e6..6197ba7e 100755 --- a/tests/test-monitor-e2e-deletion.sh +++ b/tests/test-monitor-e2e-deletion.sh @@ -11,7 +11,6 @@ echo "" monitor_test_bash_deletion monitor_test_zsh_deletion -monitor_test_pr_deletion echo "" echo "========================================" diff --git a/tests/test-monitor-e2e-real.sh b/tests/test-monitor-e2e-real.sh index aa9eba1d..8a1c4f0e 100755 --- a/tests/test-monitor-e2e-real.sh +++ b/tests/test-monitor-e2e-real.sh @@ -685,317 +685,6 @@ ZSH_SIGINT_SCRIPT fi } -# ======================================== -# Test 5: Real _humanize_monitor_pr with directory deletion -# ======================================== -monitor_test_pr_deletion() { - echo "" - echo "Test 5: Real _humanize_monitor_pr with directory deletion" - echo "" - - # Create test project directory for PR monitor - TEST_PROJECT_PR="$TEST_BASE/project_pr" - mkdir -p "$TEST_PROJECT_PR/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create valid PR loop state.md file - cat > "$TEST_PROJECT_PR/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'STATE' -current_round: 1 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude - - codex -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -STATE - - # Create goal-tracker.md for PR loop - cat > "$TEST_PROJECT_PR/.humanize/pr-loop/2026-01-18_12-00-00/goal-tracker.md" << 'GOALTRACKER_EOF' -# PR Review Goal Tracker - -## PR Information -- PR Number: #123 -- Branch: test-branch -- Started: 2026-01-18T10:00:00Z - -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | - -## Total Statistics -- Total Issues Found: 0 -- Remaining: 0 -GOALTRACKER_EOF - - # Create fake HOME for PR monitor test - FAKE_HOME_PR="$TEST_BASE/home_pr" - mkdir -p "$FAKE_HOME_PR" - - # Create cache directory for PR monitor - SANITIZED_PROJECT_PR=$(echo "$TEST_PROJECT_PR" | sed 's/[^a-zA-Z0-9._-]/-/g' | sed 's/--*/-/g') - CACHE_DIR_PR="$FAKE_HOME_PR/.cache/humanize/$SANITIZED_PROJECT_PR/2026-01-18_12-00-00" - mkdir -p "$CACHE_DIR_PR" - echo "PR round 1 started" > "$CACHE_DIR_PR/round-1-codex-run.log" - - # Create bash test runner script for PR monitor - cat > "$TEST_PROJECT_PR/run_real_monitor_pr.sh" << 'MONITOR_SCRIPT' -#!/usr/bin/env bash -# Run the REAL _humanize_monitor_pr function - -PROJECT_DIR="$1" -PROJECT_ROOT="$2" -FAKE_HOME="$3" - -cd "$PROJECT_DIR" - -# Override HOME and XDG_CACHE_HOME -export HOME="$FAKE_HOME" -export XDG_CACHE_HOME="$FAKE_HOME/.cache" - -# Create shim functions for terminal commands -tput() { - case "$1" in - cols) echo "80" ;; - lines) echo "24" ;; - *) : ;; - esac -} - -# Stub terminal control -printf() { - case "$1" in - *\\033*) : ;; # Ignore escape sequences - *) builtin printf "$@" ;; - esac -} - -# Source the humanize script (loads all functions) -source "$PROJECT_ROOT/scripts/humanize.sh" - -# Override _pr_cleanup for testing -_pr_cleanup() { - echo "CLEANUP_CALLED_PR" -} - -# Start monitor with --once flag (single iteration) -# Then delete directory after brief delay -( - sleep 0.5 - rm -rf "$PROJECT_DIR/.humanize/pr-loop/2026-01-18_12-00-00" -) & -cleanup_pid=$! - -# Run monitor in foreground (will detect deletion) -humanize monitor pr --once 2>&1 - -echo "EXIT_CODE:$?" - -# Cleanup background process -kill $cleanup_pid 2>/dev/null || true -wait $cleanup_pid 2>/dev/null || true -MONITOR_SCRIPT - - chmod +x "$TEST_PROJECT_PR/run_real_monitor_pr.sh" - - # Run the PR monitor test - output_pr=$("$TEST_PROJECT_PR/run_real_monitor_pr.sh" "$TEST_PROJECT_PR" "$PROJECT_ROOT" "$FAKE_HOME_PR" 2>&1) || true - - # Verify: PR monitor e2e - graceful exit - if echo "$output_pr" | grep -qE 'Stopped|gracefully|EXIT_CODE:0'; then - pass "PR monitor e2e - graceful exit on directory deletion" - else - # Alternative: check for any clean exit indication - if echo "$output_pr" | grep -q "EXIT_CODE:0"; then - pass "PR monitor e2e - clean exit" - else - fail "PR monitor e2e" "Expected graceful stop or EXIT_CODE:0, got: $output_pr" - fi - fi - - # Verify no glob errors in PR monitor output - if echo "$output_pr" | grep -qE 'no matches found|bad pattern'; then - fail "PR monitor glob errors" "Found glob errors: $(echo "$output_pr" | grep -E 'no matches found|bad pattern')" - else - pass "PR monitor no glob errors" - fi -} - -# ======================================== -# Test 6: Real _humanize_monitor_pr without --once with SIGINT -# ======================================== -monitor_test_pr_sigint() { - echo "" - echo "Test 6: Real _humanize_monitor_pr without --once with SIGINT" - echo "" - - # Create test project directory for PR monitor without --once - TEST_PROJECT_PR_NO_ONCE="$TEST_BASE/project_pr_no_once" - mkdir -p "$TEST_PROJECT_PR_NO_ONCE/.humanize/pr-loop/2026-01-18_13-00-00" - - # Create valid PR loop state.md file - cat > "$TEST_PROJECT_PR_NO_ONCE/.humanize/pr-loop/2026-01-18_13-00-00/state.md" << 'STATE' -current_round: 1 -max_iterations: 42 -pr_number: 456 -start_branch: test-branch-no-once -configured_bots: - - claude - - codex -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 2 -poll_timeout: 60 -started_at: 2026-01-18T13:00:00Z -STATE - - # Create goal-tracker.md for PR loop - cat > "$TEST_PROJECT_PR_NO_ONCE/.humanize/pr-loop/2026-01-18_13-00-00/goal-tracker.md" << 'PR_GOAL_EOF' -# PR Review Goal Tracker - -## PR Information -- PR Number: #456 -- Branch: test-branch-no-once -- Started: 2026-01-18T13:00:00Z - -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | - -## Total Statistics -- Total Issues Found: 0 -- Remaining: 0 -PR_GOAL_EOF - - # Create fake HOME for PR monitor test without --once - FAKE_HOME_PR_NO_ONCE="$TEST_BASE/home_pr_no_once" - mkdir -p "$FAKE_HOME_PR_NO_ONCE" - - # Create cache directory for PR monitor - SANITIZED_PROJECT_PR_NO_ONCE=$(echo "$TEST_PROJECT_PR_NO_ONCE" | sed 's/[^a-zA-Z0-9._-]/-/g' | sed 's/--*/-/g') - CACHE_DIR_PR_NO_ONCE="$FAKE_HOME_PR_NO_ONCE/.cache/humanize/$SANITIZED_PROJECT_PR_NO_ONCE/2026-01-18_13-00-00" - mkdir -p "$CACHE_DIR_PR_NO_ONCE" - echo "PR round 1 started" > "$CACHE_DIR_PR_NO_ONCE/round-1-codex-run.log" - - # Create bash test runner script for PR monitor without --once - cat > "$TEST_PROJECT_PR_NO_ONCE/run_real_monitor_pr_no_once.sh" << 'PR_NO_ONCE_EOF' -#!/usr/bin/env bash -# Run the REAL _humanize_monitor_pr function WITHOUT --once flag - -PROJECT_DIR="$1" -PROJECT_ROOT="$2" -FAKE_HOME="$3" - -cd "$PROJECT_DIR" - -# Override HOME and XDG_CACHE_HOME -export HOME="$FAKE_HOME" -export XDG_CACHE_HOME="$FAKE_HOME/.cache" - -# Create shim functions for terminal commands -tput() { - case "$1" in - cols) echo "80" ;; - lines) echo "24" ;; - *) : ;; - esac -} - -# Stub terminal control -printf() { - case "$1" in - *\\033*) : ;; # Ignore escape sequences - *) builtin printf "$@" ;; - esac -} - -# Source the humanize script (loads all functions) -source "$PROJECT_ROOT/scripts/humanize.sh" - -# Run monitor in foreground WITHOUT --once flag -# This runs the actual poll loop (not just one iteration) -humanize monitor pr 2>&1 -exit_code=$? - -echo "EXIT_CODE:$exit_code" -PR_NO_ONCE_EOF - - chmod +x "$TEST_PROJECT_PR_NO_ONCE/run_real_monitor_pr_no_once.sh" - - # Run the PR monitor in background (no --once means it will loop until interrupted) - OUTPUT_FILE_PR_NO_ONCE="$TEST_BASE/output_pr_no_once.txt" - bash "$TEST_PROJECT_PR_NO_ONCE/run_real_monitor_pr_no_once.sh" "$TEST_PROJECT_PR_NO_ONCE" "$PROJECT_ROOT" "$FAKE_HOME_PR_NO_ONCE" > "$OUTPUT_FILE_PR_NO_ONCE" 2>&1 & - MONITOR_PID_PR_NO_ONCE=$! - - # Wait for monitor to start running its poll loop - sleep 3 - - # Verify monitor is running before sending SIGINT - if kill -0 $MONITOR_PID_PR_NO_ONCE 2>/dev/null; then - # Send SIGINT to stop the continuous monitor (simulates Ctrl+C) - # Using negative PID sends to entire process group - kill -INT -$MONITOR_PID_PR_NO_ONCE 2>/dev/null || kill -INT $MONITOR_PID_PR_NO_ONCE 2>/dev/null || true - - # Wait for monitor to exit gracefully after SIGINT - WAIT_COUNT=0 - while kill -0 $MONITOR_PID_PR_NO_ONCE 2>/dev/null && [[ $WAIT_COUNT -lt 20 ]]; do - sleep 0.5 - WAIT_COUNT=$((WAIT_COUNT + 1)) - done - - # Force kill if still running - if kill -0 $MONITOR_PID_PR_NO_ONCE 2>/dev/null; then - # Try SIGTERM before SIGKILL - kill -TERM $MONITOR_PID_PR_NO_ONCE 2>/dev/null || true - sleep 1 - if kill -0 $MONITOR_PID_PR_NO_ONCE 2>/dev/null; then - kill -9 $MONITOR_PID_PR_NO_ONCE 2>/dev/null || true - fi - wait $MONITOR_PID_PR_NO_ONCE 2>/dev/null || true - # Still count as pass if the monitor ran and was terminated (SIGINT delivery is complex) - pass "PR monitor (no --once) handled via SIGTERM" - else - wait $MONITOR_PID_PR_NO_ONCE 2>/dev/null || true - pass "PR monitor (no --once) exited after SIGINT" - fi - else - fail "PR monitor (no --once) start" "Monitor did not start properly" - fi - - # Read captured output - output_pr_no_once=$(cat "$OUTPUT_FILE_PR_NO_ONCE" 2>/dev/null || echo "") - - # Verify clean exit after SIGINT - if echo "$output_pr_no_once" | grep -qE 'Stopped|Monitor stopped|EXIT_CODE:[01]'; then - pass "PR monitor (no --once) clean SIGINT exit" - else - # Check for any indication the monitor ran properly before SIGINT - if echo "$output_pr_no_once" | grep -qE 'PR|loop|Waiting|session'; then - pass "PR monitor (no --once) ran before SIGINT" - else - fail "PR monitor (no --once) SIGINT cleanup" "Expected cleanup message, got: $(head -c 300 <<< "$output_pr_no_once" | tr '\n' ' ')" - fi - fi - - # Verify no glob errors in PR monitor output - if echo "$output_pr_no_once" | grep -qE 'no matches found|bad pattern'; then - fail "PR monitor (no --once) glob errors" "Found glob errors" - else - pass "PR monitor (no --once) no glob errors" - fi -} - # ======================================== # Run all tests and print summary when executed directly # ======================================== @@ -1009,8 +698,6 @@ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then monitor_test_zsh_deletion monitor_test_bash_sigint monitor_test_zsh_sigint - monitor_test_pr_deletion - monitor_test_pr_sigint # Summary echo "" @@ -1029,7 +716,6 @@ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then echo "VERIFIED: Terminal state restored" echo "VERIFIED: Works in bash and zsh" echo "VERIFIED: Real SIGINT/Ctrl+C handling (bash and zsh)" - echo "VERIFIED: PR monitor e2e works (with and without --once)" exit 0 else echo "" diff --git a/tests/test-monitor-e2e-sigint.sh b/tests/test-monitor-e2e-sigint.sh index a6bfe20a..92aea080 100755 --- a/tests/test-monitor-e2e-sigint.sh +++ b/tests/test-monitor-e2e-sigint.sh @@ -11,7 +11,6 @@ echo "" monitor_test_bash_sigint monitor_test_zsh_sigint -monitor_test_pr_sigint echo "" echo "========================================" diff --git a/tests/test-pr-loop-1-scripts.sh b/tests/test-pr-loop-1-scripts.sh deleted file mode 100755 index 38ccd846..00000000 --- a/tests/test-pr-loop-1-scripts.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop Script Tests Runner (parallel split 1/3) -# -# Runs only script argument validation tests from the PR loop test suite. -# See test-pr-loop.sh for the combined runner. -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -source "$SCRIPT_DIR/test-helpers.sh" -source "$SCRIPT_DIR/test-pr-loop-lib.sh" - -init_pr_loop_test_env - -source "$SCRIPT_DIR/test-pr-loop-scripts.sh" - -run_script_tests - -print_test_summary "PR Loop Script Tests" -exit $? diff --git a/tests/test-pr-loop-2-hooks.sh b/tests/test-pr-loop-2-hooks.sh deleted file mode 100755 index 56f6219e..00000000 --- a/tests/test-pr-loop-2-hooks.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop Hook Tests Runner (parallel split 2/3) -# -# Runs only hook functionality tests from the PR loop test suite. -# See test-pr-loop.sh for the combined runner. -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -source "$SCRIPT_DIR/test-helpers.sh" -source "$SCRIPT_DIR/test-pr-loop-lib.sh" - -init_pr_loop_test_env - -source "$SCRIPT_DIR/test-pr-loop-hooks.sh" - -run_hook_tests - -print_test_summary "PR Loop Hook Tests" -exit $? diff --git a/tests/test-pr-loop-3-stophook.sh b/tests/test-pr-loop-3-stophook.sh deleted file mode 100755 index 4027f478..00000000 --- a/tests/test-pr-loop-3-stophook.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop Stop Hook Tests Runner (parallel split 3/3) -# -# Runs only stop hook integration tests from the PR loop test suite. -# This is the slowest module due to timeout-based bot polling tests. -# See test-pr-loop.sh for the combined runner. -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -source "$SCRIPT_DIR/test-helpers.sh" -source "$SCRIPT_DIR/test-pr-loop-lib.sh" - -if [[ "${GITHUB_ACTIONS:-}" == "true" ]]; then - skip "PR Loop Stop Hook Tests" "Skipped in GitHub Actions" - print_test_summary "PR Loop Stop Hook Tests" - exit 0 -fi - -init_pr_loop_test_env - -source "$SCRIPT_DIR/test-pr-loop-stophook.sh" - -run_stophook_tests - -print_test_summary "PR Loop Stop Hook Tests" -exit $? diff --git a/tests/test-pr-loop-hooks.sh b/tests/test-pr-loop-hooks.sh deleted file mode 100644 index de4d09f7..00000000 --- a/tests/test-pr-loop-hooks.sh +++ /dev/null @@ -1,1623 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop Hook Tests -# -# Tests for hook functionality: -# - Validators and protections -# - Comment processing -# - E2E tests -# - Fixture-based tests -# - Monitor tests -# -# Usage: source test-pr-loop-hooks.sh && run_hook_tests -# - -run_hook_tests() { -# ======================================== -# PR Loop Validator Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing PR Loop Validators" -echo "========================================" -echo "" - -# Test: active_bots is stored as YAML list -test_active_bots_yaml_format() { - cd "$TEST_DIR" - - # Create mock git repo - init_test_git_repo "$TEST_DIR/repo" - cd "$TEST_DIR/repo" - - # Create PR loop state file with proper YAML format - local timestamp="2026-01-18_13-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -active_bots: - - claude - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T13:00:00Z ---- -EOF - - # Verify state file has YAML list format - if grep -q "^ - claude$" "$loop_dir/state.md" && \ - grep -q "^ - codex$" "$loop_dir/state.md"; then - pass "T-POS-12: active_bots is stored as YAML list format" - else - fail "T-POS-12: active_bots should be stored as YAML list format" - fi - - cd "$SCRIPT_DIR" -} - -# Test: PR loop state file is protected from writes -test_pr_loop_state_protected() { - cd "$TEST_DIR" - - # Create mock loop directory - local timestamp="2026-01-18_14-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Test that write validator blocks state.md writes - local hook_input='{"tool_name": "Write", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/state.md", "content": "malicious content"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 2 ]] && echo "$output" | grep -qi "state.*blocked\|pr loop"; then - pass "T-SEC-1: PR loop state.md is protected from writes" - else - fail "T-SEC-1: PR loop state.md should be protected from writes" "exit=2, blocked" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Test: PR loop comment file is protected from writes -test_pr_loop_comment_protected() { - cd "$TEST_DIR" - - local timestamp="2026-01-18_14-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Test that write validator blocks pr-comment.md writes - local hook_input='{"tool_name": "Write", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/round-0-pr-comment.md", "content": "fake comments"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 2 ]]; then - pass "T-SEC-2: PR loop pr-comment file is protected from writes" - else - fail "T-SEC-2: PR loop pr-comment file should be protected from writes" "exit=2" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Test: PR loop resolve file is allowed for writes -test_pr_loop_resolve_allowed() { - cd "$TEST_DIR" - - local timestamp="2026-01-18_14-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Test that write validator allows pr-resolve.md writes - local hook_input='{"tool_name": "Write", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/round-0-pr-resolve.md", "content": "resolution summary"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 0 ]]; then - pass "T-POS-13: PR loop pr-resolve file is allowed for writes" - else - fail "T-POS-13: PR loop pr-resolve file should be allowed for writes" "exit=0" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Run validator tests -test_active_bots_yaml_format -test_pr_loop_state_protected -test_pr_loop_comment_protected -test_pr_loop_resolve_allowed - -# Test: PR loop Bash protection works without RLCR loop -test_pr_loop_bash_protection_no_rlcr() { - cd "$TEST_DIR" - - # Ensure NO RLCR loop exists - rm -rf ".humanize/rlcr" - - local timestamp="2026-01-18_14-30-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 456 ---- -EOF - - # Test that Bash validator blocks state.md modifications via echo redirect - local hook_input='{"tool_name": "Bash", "tool_input": {"command": "echo bad > '$TEST_DIR'/.humanize/pr-loop/'$timestamp'/state.md"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-bash-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 2 ]] && echo "$output" | grep -qi "state\|blocked\|pr loop"; then - pass "T-SEC-4: PR loop Bash protection works without RLCR loop" - else - fail "T-SEC-4: PR loop Bash protection should work without RLCR" "exit=2, blocked" "exit=$exit_code, output=$output" - fi - - cd "$SCRIPT_DIR" -} - -test_pr_loop_bash_protection_no_rlcr - -# ======================================== -# Comment Sorting Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Comment Sorting (fromdateiso8601)" -echo "========================================" -echo "" - -# Test: Timestamps are properly sorted (newest first) -test_timestamp_sorting() { - # Test that jq fromdateiso8601 works correctly - local sorted_output - sorted_output=$(echo '[ - {"created_at": "2026-01-18T10:00:00Z", "author_type": "User"}, - {"created_at": "2026-01-18T12:00:00Z", "author_type": "User"}, - {"created_at": "2026-01-18T11:00:00Z", "author_type": "User"} - ]' | jq 'sort_by(-(.created_at | fromdateiso8601)) | .[0].created_at') - - if [[ "$sorted_output" == '"2026-01-18T12:00:00Z"' ]]; then - pass "T-SORT-1: Comments are sorted newest first using fromdateiso8601" - else - fail "T-SORT-1: Comments should be sorted newest first" "12:00:00Z first" "got $sorted_output" - fi -} - -# Test: Human comments come before bot comments -test_human_before_bot_sorting() { - local sorted_output - sorted_output=$(echo '[ - {"created_at": "2026-01-18T12:00:00Z", "author_type": "Bot"}, - {"created_at": "2026-01-18T11:00:00Z", "author_type": "User"} - ]' | jq 'sort_by( - (if .author_type == "Bot" then 1 else 0 end), - -(.created_at | fromdateiso8601) - ) | .[0].author_type') - - if [[ "$sorted_output" == '"User"' ]]; then - pass "T-SORT-2: Human comments come before bot comments" - else - fail "T-SORT-2: Human comments should come before bot comments" "User first" "got $sorted_output" - fi -} - -# Run sorting tests -test_timestamp_sorting -test_human_before_bot_sorting - -# ======================================== -# Gate-keeper Logic Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Gate-keeper Logic" -echo "========================================" -echo "" - -# Test: Comment deduplication by ID (unit test) -test_comment_deduplication() { - # Test that jq unique_by works for deduplication - local deduped_output - deduped_output=$(echo '[ - {"id": 1, "body": "first"}, - {"id": 2, "body": "second"}, - {"id": 1, "body": "duplicate of first"} - ]' | jq 'unique_by(.id) | length') - - if [[ "$deduped_output" == "2" ]]; then - pass "T-GATE-1: Comments are deduplicated by ID" - else - fail "T-GATE-1: Comments should be deduplicated by ID" "2 unique" "got $deduped_output" - fi -} - -# Test: YAML list parsing for configured_bots -test_configured_bots_parsing() { - local test_state="--- -current_round: 0 -configured_bots: - - claude - - codex -active_bots: - - claude -codex_model: gpt-5.4 ----" - - # Extract configured_bots using same logic as stop hook - local configured_bots="" - local in_field=false - while IFS= read -r line; do - if [[ "$line" =~ ^configured_bots: ]]; then - in_field=true - continue - fi - if [[ "$in_field" == "true" ]]; then - if [[ "$line" =~ ^[[:space:]]+-[[:space:]]+ ]]; then - local bot_name="${line#*- }" - bot_name=$(echo "$bot_name" | tr -d ' ') - configured_bots="${configured_bots}${bot_name}," - elif [[ "$line" =~ ^[a-zA-Z_] ]]; then - in_field=false - fi - fi - done <<< "$test_state" - - if [[ "$configured_bots" == "claude,codex," ]]; then - pass "T-GATE-2: configured_bots YAML list is parsed correctly" - else - fail "T-GATE-2: configured_bots parsing failed" "claude,codex," "got $configured_bots" - fi -} - -# Test: Bot status extraction from Codex output -test_bot_status_extraction() { - local codex_output="### Per-Bot Status -| Bot | Status | Summary | -|-----|--------|---------| -| claude | APPROVE | No issues found | -| codex | ISSUES | Found bug in line 42 | - -### Approved Bots -- claude" - - # Extract bots with ISSUES status using same logic as stop hook - local bots_with_issues="" - while IFS= read -r line; do - if echo "$line" | grep -qiE '\|[[:space:]]*ISSUES[[:space:]]*\|'; then - local bot=$(echo "$line" | sed 's/|/\n/g' | sed -n '2p' | tr -d ' ') - bots_with_issues="${bots_with_issues}${bot}," - fi - done <<< "$codex_output" - - if [[ "$bots_with_issues" == "codex," ]]; then - pass "T-GATE-3: Bots with ISSUES status are correctly identified" - else - fail "T-GATE-3: Bot status extraction failed" "codex," "got $bots_with_issues" - fi -} - -# Test: Bot re-add logic when previously approved bot has new issues -test_bot_readd_logic() { - # Simulate: claude was approved (removed from active), but now has ISSUES - local configured_bots=("claude" "codex") - local active_bots=("codex") # claude was removed (approved) - - # Codex output shows claude now has issues - declare -A bots_with_issues - bots_with_issues["claude"]="true" - - declare -A bots_approved - # No bots approved this round - - # Re-add logic: process ALL configured bots - local new_active=() - for bot in "${configured_bots[@]}"; do - if [[ "${bots_with_issues[$bot]:-}" == "true" ]]; then - new_active+=("$bot") - fi - done - - # claude should be re-added because it has issues - local found_claude=false - for bot in "${new_active[@]}"; do - if [[ "$bot" == "claude" ]]; then - found_claude=true - break - fi - done - - if [[ "$found_claude" == "true" ]]; then - pass "T-GATE-4: Previously approved bot is re-added when it has new issues" - else - fail "T-GATE-4: Bot re-add logic failed" "claude in new_active" "not found" - fi -} - -# Test: Trigger comment timestamp detection pattern -test_trigger_comment_detection() { - local comments='[ - {"id": 1, "body": "Just a regular comment", "created_at": "2026-01-18T10:00:00Z"}, - {"id": 2, "body": "@claude @codex please review", "created_at": "2026-01-18T11:00:00Z"}, - {"id": 3, "body": "Another comment", "created_at": "2026-01-18T12:00:00Z"} - ]' - - # Build pattern for @bot mentions - local bot_pattern="@claude|@codex" - - # Find most recent trigger comment - local trigger_ts - trigger_ts=$(echo "$comments" | jq -r --arg pattern "$bot_pattern" ' - [.[] | select(.body | test($pattern; "i"))] | - sort_by(.created_at) | reverse | .[0].created_at // empty - ') - - if [[ "$trigger_ts" == "2026-01-18T11:00:00Z" ]]; then - pass "T-GATE-5: Trigger comment timestamp is correctly detected" - else - fail "T-GATE-5: Trigger timestamp detection failed" "2026-01-18T11:00:00Z" "got $trigger_ts" - fi -} - -# Test: APPROVE marker detection in Codex output -test_approve_marker_detection() { - local codex_output="### Per-Bot Status -| Bot | Status | Summary | -|-----|--------|---------| -| claude | APPROVE | LGTM | - -### Final Recommendation -All bots have approved. - -APPROVE" - - local last_line - last_line=$(echo "$codex_output" | grep -v '^[[:space:]]*$' | tail -1 | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') - - if [[ "$last_line" == "APPROVE" ]]; then - pass "T-GATE-6: APPROVE marker is correctly recognized" - else - fail "T-GATE-6: APPROVE marker detection failed" "APPROVE" "got $last_line" - fi -} - -# Test: WAITING_FOR_BOTS marker detection -test_waiting_for_bots_marker() { - local codex_output="### Per-Bot Status -| Bot | Status | Summary | -|-----|--------|---------| -| claude | NO_RESPONSE | Bot did not respond | - -### Final Recommendation -Some bots have not responded yet. - -WAITING_FOR_BOTS" - - local last_line - last_line=$(echo "$codex_output" | grep -v '^[[:space:]]*$' | tail -1 | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') - - if [[ "$last_line" == "WAITING_FOR_BOTS" ]]; then - pass "T-GATE-7: WAITING_FOR_BOTS marker is correctly recognized" - else - fail "T-GATE-7: WAITING_FOR_BOTS marker detection failed" "WAITING_FOR_BOTS" "got $last_line" - fi -} - -# Run gate-keeper tests -test_comment_deduplication -test_configured_bots_parsing -test_bot_status_extraction -test_bot_readd_logic -test_trigger_comment_detection -test_approve_marker_detection -test_waiting_for_bots_marker - -# ======================================== -# Stop Hook Integration Tests (with mocked gh/codex) -# ======================================== - -echo "" -echo "========================================" -echo "Testing Stop Hook Integration" -echo "========================================" -echo "" - -# Create enhanced mock gh that returns trigger comments -create_enhanced_mock_gh() { - local mock_dir="$1" - local trigger_user="${2:-testuser}" - local trigger_timestamp="${3:-2026-01-18T12:00:00Z}" - - cat > "$mock_dir/gh" << MOCK_GH -#!/usr/bin/env bash -# Enhanced mock gh CLI for stop hook testing - -case "\$1" in - auth) - if [[ "\$2" == "status" ]]; then - echo "Logged in to github.com" - exit 0 - fi - ;; - repo) - if [[ "\$2" == "view" ]]; then - if [[ "\$3" == "--json" && "\$4" == "owner" ]]; then - echo '{"login": "testowner"}' - elif [[ "\$3" == "--json" && "\$4" == "name" ]]; then - echo '{"name": "testrepo"}' - fi - exit 0 - fi - ;; - pr) - if [[ "\$2" == "view" ]]; then - if [[ "\$*" == *"number"* ]]; then - echo '{"number": 123}' - elif [[ "\$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - fi - exit 0 - fi - ;; - api) - # Handle user endpoint for current user - if [[ "\$2" == "user" ]]; then - echo '{"login": "${trigger_user}"}' - exit 0 - fi - # Handle PR comments endpoint - if [[ "\$2" == *"/issues/"*"/comments"* ]]; then - echo '[{"id": 1, "user": {"login": "${trigger_user}"}, "created_at": "${trigger_timestamp}", "body": "@claude @codex please review"}]' - exit 0 - fi - # Return empty arrays for other endpoints - echo "[]" - exit 0 - ;; -esac - -echo "Mock gh: unhandled command: \$*" >&2 -exit 1 -MOCK_GH - chmod +x "$mock_dir/gh" -} - -# Test: Trigger comment detection filters by current user -test_trigger_user_filter() { - local test_subdir="$TEST_DIR/stop_hook_user_test" - mkdir -p "$test_subdir" - - # Create mock that returns comments from different users - cat > "$test_subdir/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo '{"login": "myuser"}' - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - echo '[ - {"id": 1, "user": {"login": "otheruser"}, "created_at": "2026-01-18T11:00:00Z", "body": "@claude please review"}, - {"id": 2, "user": {"login": "myuser"}, "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review"}, - {"id": 3, "user": {"login": "otheruser"}, "created_at": "2026-01-18T13:00:00Z", "body": "@claude please review"} - ]' - exit 0 - fi - echo "[]" - exit 0 - ;; -esac -exit 1 -MOCK_GH - chmod +x "$test_subdir/gh" - - # Test the jq filter logic - local comments='[ - {"id": 1, "author": "otheruser", "created_at": "2026-01-18T11:00:00Z", "body": "@claude please review"}, - {"id": 2, "author": "myuser", "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review"}, - {"id": 3, "author": "otheruser", "created_at": "2026-01-18T13:00:00Z", "body": "@claude please review"} - ]' - - local trigger_ts - trigger_ts=$(echo "$comments" | jq -r --arg pattern "@claude" --arg user "myuser" ' - [.[] | select(.author == $user and (.body | test($pattern; "i")))] | - sort_by(.created_at) | reverse | .[0].created_at // empty - ') - - if [[ "$trigger_ts" == "2026-01-18T12:00:00Z" ]]; then - pass "T-HOOK-1: Trigger detection filters by current user" - else - fail "T-HOOK-1: Trigger should be from myuser only" "2026-01-18T12:00:00Z" "got $trigger_ts" - fi -} - -# Test: Trigger timestamp refresh when newer exists -test_trigger_refresh() { - local old_trigger="2026-01-18T10:00:00Z" - local new_trigger="2026-01-18T12:00:00Z" - - # Simulate the refresh logic from stop hook - local should_update=false - if [[ -z "$old_trigger" ]] || [[ "$new_trigger" > "$old_trigger" ]]; then - should_update=true - fi - - if [[ "$should_update" == "true" ]]; then - pass "T-HOOK-2: Trigger timestamp refreshes when newer comment exists" - else - fail "T-HOOK-2: Should update trigger when newer" "update" "no update" - fi -} - -# Test: Missing trigger blocks exit for round > 0 -test_missing_trigger_blocks() { - local current_round=1 - local last_trigger_at="" - - # Simulate the check from stop hook - local should_block=false - if [[ "$current_round" -gt 0 && -z "$last_trigger_at" ]]; then - should_block=true - fi - - if [[ "$should_block" == "true" ]]; then - pass "T-HOOK-3: Missing trigger comment blocks exit for round > 0" - else - fail "T-HOOK-3: Should block when no trigger" "block" "allow" - fi -} - -# Test: Round 0 uses last_trigger_at when present, started_at as fallback -test_round0_trigger_priority() { - local current_round=0 - local started_at="2026-01-18T10:00:00Z" - local last_trigger_at="2026-01-18T11:00:00Z" - - # Simulate the timestamp selection from stop hook (updated logic) - # ALWAYS prefer last_trigger_at when available - local after_timestamp - if [[ -n "$last_trigger_at" ]]; then - after_timestamp="$last_trigger_at" - elif [[ "$current_round" -eq 0 ]]; then - after_timestamp="$started_at" - fi - - if [[ "$after_timestamp" == "$last_trigger_at" ]]; then - pass "T-HOOK-4: Round 0 uses last_trigger_at when present (not started_at)" - else - fail "T-HOOK-4: Round 0 should prefer last_trigger_at" "$last_trigger_at" "got $after_timestamp" - fi -} - -# Test: Round 0 falls back to started_at when no trigger -test_round0_started_at_fallback() { - local current_round=0 - local started_at="2026-01-18T10:00:00Z" - local last_trigger_at="" - - # Simulate the timestamp selection from stop hook - local after_timestamp - if [[ -n "$last_trigger_at" ]]; then - after_timestamp="$last_trigger_at" - elif [[ "$current_round" -eq 0 ]]; then - after_timestamp="$started_at" - fi - - if [[ "$after_timestamp" == "$started_at" ]]; then - pass "T-HOOK-4b: Round 0 falls back to started_at when no trigger" - else - fail "T-HOOK-4b: Round 0 should fall back to started_at" "$started_at" "got $after_timestamp" - fi -} - -# Test: Per-bot timeout anchored to trigger timestamp -test_timeout_anchored_to_trigger() { - # Simulate: trigger at T=0, poll starts at T=60, timeout is 900s - local trigger_epoch=1000 - local poll_start_epoch=1060 - local current_time=1900 # 900s after trigger, 840s after poll start - local timeout=900 - - # With trigger-anchored timeout: - local elapsed_from_trigger=$((current_time - trigger_epoch)) - # With poll-anchored timeout (wrong): - local elapsed_from_poll=$((current_time - poll_start_epoch)) - - local timed_out_trigger=false - local timed_out_poll=false - - if [[ $elapsed_from_trigger -ge $timeout ]]; then - timed_out_trigger=true - fi - if [[ $elapsed_from_poll -ge $timeout ]]; then - timed_out_poll=true - fi - - # Should be timed out based on trigger (900s elapsed), not poll (840s elapsed) - if [[ "$timed_out_trigger" == "true" && "$timed_out_poll" == "false" ]]; then - pass "T-HOOK-5: Per-bot timeout is anchored to trigger timestamp" - else - fail "T-HOOK-5: Timeout should be from trigger, not poll start" "trigger-based timeout" "poll-based timeout" - fi -} - -# Test: State file includes configured_bots -test_state_has_configured_bots() { - local test_subdir="$TEST_DIR/state_configured_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 1 -configured_bots: - - claude - - codex -active_bots: - - claude -last_trigger_at: 2026-01-18T12:00:00Z ---- -EOF - - # Extract configured_bots count - local configured_count - configured_count=$(grep -c "^ - " "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" 2>/dev/null | head -1) - - if [[ "$configured_count" -ge 2 ]]; then - pass "T-HOOK-6: State file tracks configured_bots separately" - else - fail "T-HOOK-6: State should have configured_bots" "2+ bots" "got $configured_count" - fi -} - -# Test: Round file naming consistency -test_round_file_naming() { - # All round-N files should use NEXT_ROUND - local current_round=1 - local next_round=$((current_round + 1)) - - local comment_file="round-${next_round}-pr-comment.md" - local check_file="round-${next_round}-pr-check.md" - local feedback_file="round-${next_round}-pr-feedback.md" - - # All should use next_round (2) - if [[ "$comment_file" == "round-2-pr-comment.md" && \ - "$check_file" == "round-2-pr-check.md" && \ - "$feedback_file" == "round-2-pr-feedback.md" ]]; then - pass "T-HOOK-7: Round file naming is consistent (all use NEXT_ROUND)" - else - fail "T-HOOK-7: Round files should all use NEXT_ROUND" "round-2-*" "inconsistent" - fi -} - -# Run stop hook integration tests -test_trigger_user_filter -test_trigger_refresh -test_missing_trigger_blocks -test_round0_trigger_priority -test_round0_started_at_fallback -test_timeout_anchored_to_trigger -test_state_has_configured_bots -test_round_file_naming - -# ======================================== -# Stop Hook End-to-End Tests (Execute Hook with Mocked gh/codex) -# ======================================== - -echo "" -echo "========================================" -echo "Testing Stop Hook End-to-End Execution" -echo "========================================" -echo "" - -# Test: Stop hook blocks when no resolve file exists -test_e2e_missing_resolve_blocks() { - local test_subdir="$TEST_DIR/e2e_resolve_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T12:00:00Z -last_trigger_at: ---- -EOF - - # Create mock binaries - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo '{"login": "testuser"}' - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) echo "/tmp/git" ;; - status) echo "" ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - # Run stop hook with mocked environment - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Check for block decision about missing resolve file - if echo "$hook_output" | grep -q "Resolution Summary Missing\|resolution summary\|round-0-pr-resolve"; then - pass "T-E2E-1: Stop hook blocks when resolve file missing" - else - fail "T-E2E-1: Stop hook should block for missing resolve" "block message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Stop hook detects trigger comment and updates state -test_e2e_trigger_detection() { - local test_subdir="$TEST_DIR/e2e_trigger_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file with empty last_trigger_at - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T12:00:00Z -last_trigger_at: ---- -EOF - - # Create resolve file - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - # Create mock binaries that return trigger comment - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that properly returns jq-parsed user and trigger comments - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - # gh api user --jq '.login' returns just the login string - if [[ "$*" == *"--jq"* ]]; then - echo "testuser" - else - echo '{"login": "testuser"}' - fi - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - # When --jq and --paginate are used, gh applies jq per-element and outputs transformed objects - # The hook's jq: '.[] | {id: .id, author: .user.login, created_at: .created_at, body: .body}' - if [[ "$*" == *"--jq"* ]]; then - # Return pre-transformed format (what jq would output) - echo '{"id": 1, "author": "testuser", "created_at": "2026-01-18T13:00:00Z", "body": "@claude please review"}' - else - # Return raw GitHub API format - echo '[{"id": 1, "user": {"login": "testuser"}, "created_at": "2026-01-18T13:00:00Z", "body": "@claude please review"}]' - fi - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) echo "/tmp/git" ;; - status) echo "" ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - # Run stop hook - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Capture stderr for debug messages - local hook_stderr - hook_stderr=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1 >/dev/null) || true - - # Check for trigger detection message OR that last_trigger_at is being used - # (which indicates the trigger was detected and persisted) - if echo "$hook_stderr" | grep -q "Found trigger comment at:\|using trigger timestamp"; then - pass "T-E2E-2: Stop hook detects and reports trigger comment" - else - fail "T-E2E-2: Stop hook should detect trigger" "trigger detected" "got: $hook_stderr" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Stop hook handles paginated API response (multi-page trigger detection) -test_e2e_pagination_runtime() { - local test_subdir="$TEST_DIR/e2e_pagination_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: ---- -EOF - - # Create resolve file - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that simulates paginated response (returns multiple JSON arrays) - # The trigger comment is on page 2 (second array) - only visible if pagination works - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - if [[ "$*" == *"--jq"* ]]; then - echo "testuser" - else - echo '{"login": "testuser"}' - fi - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - # When --jq and --paginate are used, gh applies jq per-element and outputs transformed objects - # Page 1: old comment without trigger - # Page 2: newer comment WITH trigger - must combine to find it - if [[ "$*" == *"--paginate"* ]] && [[ "$*" == *"--jq"* ]]; then - # --paginate with --jq: output transformed objects (one per line) - echo '{"id": 1, "author": "other", "created_at": "2026-01-18T11:00:00Z", "body": "old comment"}' - echo '{"id": 2, "author": "testuser", "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review the pagination fix"}' - elif [[ "$*" == *"--paginate"* ]]; then - # --paginate without --jq: output raw arrays - echo '[{"id": 1, "user": {"login": "other"}, "created_at": "2026-01-18T11:00:00Z", "body": "old comment"}]' - echo '[{"id": 2, "user": {"login": "testuser"}, "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review the pagination fix"}]' - else - # No pagination: only first page (trigger NOT found) - echo '[{"id": 1, "user": {"login": "other"}, "created_at": "2026-01-18T11:00:00Z", "body": "old comment"}]' - fi - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) echo "/tmp/git" ;; - status) echo "" ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - # Run stop hook - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_stderr - hook_stderr=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1 >/dev/null) || true - - # Check that trigger was found (proving pagination worked to combine arrays) - if echo "$hook_stderr" | grep -q "Found trigger comment at:\|using trigger timestamp"; then - pass "T-E2E-3: Pagination combines arrays and finds trigger on page 2" - else - fail "T-E2E-3: Pagination should find trigger on page 2" "trigger detected" "got: $hook_stderr" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Stop hook uses last_trigger_at when present (even for round 0) -test_e2e_trigger_priority_runtime() { - local test_subdir="$TEST_DIR/e2e_priority_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file with BOTH started_at and last_trigger_at set - # The trigger timestamp is LATER than started_at - if priority works, - # the hook should use the trigger timestamp (not started_at) - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: 2026-01-18T14:30:00Z ---- -EOF - - # Create resolve file - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - if [[ "$*" == *"--jq"* ]]; then - echo "testuser" - fi - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - echo '[{"id": 1, "author": "testuser", "created_at": "2026-01-18T14:30:00Z", "body": "@claude review"}]' - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) echo "/tmp/git" ;; - status) echo "" ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_stderr - hook_stderr=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1 >/dev/null) || true - - # Check that it reports using trigger timestamp for --after (not started_at) - # Must match the SPECIFIC log format: "Round 0: using trigger timestamp for --after: " - # This proves last_trigger_at is prioritized even for round 0 - if echo "$hook_stderr" | grep -q "Round 0: using trigger timestamp for --after: 2026-01-18T14:30:00Z"; then - pass "T-E2E-4: Round 0 uses last_trigger_at for --after (not started_at)" - else - fail "T-E2E-4: Round 0 should use last_trigger_at for --after" \ - "Round 0: using trigger timestamp for --after: 2026-01-18T14:30:00Z" \ - "got: $hook_stderr" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Run end-to-end tests -test_e2e_missing_resolve_blocks -test_e2e_trigger_detection -test_e2e_pagination_runtime -test_e2e_trigger_priority_runtime - -# ======================================== -# Approval-Only Review Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Approval-Only Review Handling" -echo "========================================" -echo "" - -# Test: Empty-body PR reviews are captured with state placeholder -test_approval_only_review_captured() { - # Simulate PR review with APPROVED state but empty body - local reviews='[ - {"id": 1, "user": {"login": "claude[bot]"}, "state": "APPROVED", "body": null, "submitted_at": "2026-01-18T12:00:00Z"}, - {"id": 2, "user": {"login": "claude[bot]"}, "state": "APPROVED", "body": "", "submitted_at": "2026-01-18T12:01:00Z"}, - {"id": 3, "user": {"login": "claude[bot]"}, "state": "CHANGES_REQUESTED", "body": "Fix bug", "submitted_at": "2026-01-18T12:02:00Z"} - ]' - - # Apply the same jq logic as poll-pr-reviews.sh (fixed version) - local processed - processed=$(echo "$reviews" | jq '[.[] | { - id: .id, - author: .user.login, - state: .state, - body: (if .body == null or .body == "" then "[Review state: \(.state)]" else .body end) - }]') - - local count - count=$(echo "$processed" | jq 'length') - - if [[ "$count" == "3" ]]; then - pass "T-APPROVE-1: Empty-body PR reviews are captured (count=3)" - else - fail "T-APPROVE-1: All reviews should be captured including empty-body" "3" "got $count" - fi - - # Check that empty body gets placeholder - local placeholder_count - placeholder_count=$(echo "$processed" | jq '[.[] | select(.body | test("\\[Review state:"))] | length') - - if [[ "$placeholder_count" == "2" ]]; then - pass "T-APPROVE-2: Empty-body reviews get state placeholder" - else - fail "T-APPROVE-2: Empty-body reviews should get placeholder" "2" "got $placeholder_count" - fi -} - -# Test: Approval-only reviews match bot patterns for polling -test_approval_polls_correctly() { - local bot_pattern="claude\\[bot\\]" - local reviews='[ - {"type": "pr_review", "author": "claude[bot]", "state": "APPROVED", "body": "[Review state: APPROVED]", "created_at": "2026-01-18T12:00:00Z"} - ]' - - local filtered - filtered=$(echo "$reviews" | jq --arg pattern "$bot_pattern" '[.[] | select(.author | test($pattern; "i"))]') - local count - count=$(echo "$filtered" | jq 'length') - - if [[ "$count" == "1" ]]; then - pass "T-APPROVE-3: Approval-only reviews match bot pattern for polling" - else - fail "T-APPROVE-3: Approval-only review should match bot" "1" "got $count" - fi -} - -# Run approval-only review tests -test_approval_only_review_captured -test_approval_polls_correctly - -# ======================================== -# Fixture-Backed Fetch/Poll Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Fetch/Poll with Fixture-Backed Mock GH" -echo "========================================" -echo "" - -# Set up fixture-backed mock gh -setup_fixture_mock_gh() { - local mock_bin_dir="$TEST_DIR/mock_bin" - local fixtures_dir="$SCRIPT_DIR/fixtures" - - # Create the mock gh - "$SCRIPT_DIR/setup-fixture-mock-gh.sh" "$mock_bin_dir" "$fixtures_dir" > /dev/null - - echo "$mock_bin_dir" -} - -# Test: fetch-pr-comments.sh returns all comment types including approval-only reviews -test_fetch_pr_comments_with_fixtures() { - cd "$TEST_DIR" - - local mock_bin_dir - mock_bin_dir=$(setup_fixture_mock_gh) - - # Run fetch-pr-comments.sh with mock gh in PATH - local output_file="$TEST_DIR/pr-comments.md" - PATH="$mock_bin_dir:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$output_file" - - local exit_code=$? - - if [[ $exit_code -ne 0 ]]; then - fail "T-FIXTURE-1: fetch-pr-comments.sh should succeed" "exit=0" "exit=$exit_code" - return - fi - - if [[ ! -f "$output_file" ]]; then - fail "T-FIXTURE-1: Output file should exist" "file exists" "file not found" - return - fi - - # Check for issue comments - if ! grep -q "humanuser" "$output_file"; then - fail "T-FIXTURE-1: Output should contain human issue comment" "humanuser comment" "not found" - return - fi - - # Check for review comments (inline code comments) - if ! grep -q "const instead of let" "$output_file"; then - fail "T-FIXTURE-1: Output should contain inline review comment" "const instead of let" "not found" - return - fi - - # Check for approval-only PR reviews with placeholder - if ! grep -q "\[Review state: APPROVED\]" "$output_file"; then - fail "T-FIXTURE-1: Output should contain approval-only review with placeholder" "[Review state: APPROVED]" "not found" - return - fi - - pass "T-FIXTURE-1: fetch-pr-comments.sh returns all comment types including approval-only" - cd "$SCRIPT_DIR" -} - -# Test: fetch-pr-comments.sh respects --after timestamp filter -test_fetch_pr_comments_after_filter() { - cd "$TEST_DIR" - - local mock_bin_dir - mock_bin_dir=$(setup_fixture_mock_gh) - - # Run with --after filter (after 12:00, should exclude early comments) - local output_file="$TEST_DIR/pr-comments-filtered.md" - PATH="$mock_bin_dir:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$output_file" --after "2026-01-18T12:00:00Z" - - local exit_code=$? - - if [[ $exit_code -ne 0 ]]; then - fail "T-FIXTURE-2: fetch-pr-comments.sh --after should succeed" "exit=0" "exit=$exit_code" - return - fi - - # Should include late comments (13:00+ approvals) - if ! grep -q "\[Review state: APPROVED\]" "$output_file"; then - fail "T-FIXTURE-2: Should include late approval-only review" "[Review state: APPROVED]" "not found" - return - fi - - # Should NOT include early human comment from 09:00 - # (humanreviewer's "LGTM!" was at 09:00) - if grep -q "LGTM" "$output_file"; then - fail "T-FIXTURE-2: Should exclude comments before --after timestamp" "no LGTM" "LGTM found" - return - fi - - pass "T-FIXTURE-2: fetch-pr-comments.sh --after filter works correctly" - cd "$SCRIPT_DIR" -} - -# Test: poll-pr-reviews.sh returns JSON with approval-only reviews -test_poll_pr_reviews_with_fixtures() { - cd "$TEST_DIR" - - local mock_bin_dir - mock_bin_dir=$(setup_fixture_mock_gh) - - # Run poll-pr-reviews.sh with mock gh in PATH - # Use early timestamp to catch all bot reviews - local output - output=$(PATH="$mock_bin_dir:$PATH" "$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 123 \ - --after "2026-01-18T10:00:00Z" \ - --bots "claude,codex") - - local exit_code=$? - - if [[ $exit_code -ne 0 ]]; then - fail "T-FIXTURE-3: poll-pr-reviews.sh should succeed" "exit=0" "exit=$exit_code" - return - fi - - # Validate JSON structure - if ! echo "$output" | jq . > /dev/null 2>&1; then - fail "T-FIXTURE-3: Output should be valid JSON" "valid JSON" "invalid JSON" - return - fi - - # Check for approval-only reviews in comments - local has_placeholder - has_placeholder=$(echo "$output" | jq '[.comments[]? | select(.body | test("\\[Review state:"))] | length') - - if [[ "$has_placeholder" -lt 1 ]]; then - fail "T-FIXTURE-3: Should include approval-only reviews with placeholder" ">=1" "$has_placeholder" - return - fi - - # Check bots_responded includes both bots - local bots_count - bots_count=$(echo "$output" | jq '.bots_responded | length') - - if [[ "$bots_count" -lt 1 ]]; then - fail "T-FIXTURE-3: Should have bots in bots_responded" ">=1" "$bots_count" - return - fi - - pass "T-FIXTURE-3: poll-pr-reviews.sh returns approval-only reviews in JSON" - cd "$SCRIPT_DIR" -} - -# Test: poll-pr-reviews.sh filters by --after timestamp correctly -test_poll_pr_reviews_after_filter() { - cd "$TEST_DIR" - - local mock_bin_dir - mock_bin_dir=$(setup_fixture_mock_gh) - - # Use timestamp that filters out early CHANGES_REQUESTED (11:00) - # but includes late APPROVED reviews (13:00, 13:30) - local output - output=$(PATH="$mock_bin_dir:$PATH" "$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 123 \ - --after "2026-01-18T12:30:00Z" \ - --bots "claude,codex") - - local exit_code=$? - - if [[ $exit_code -ne 0 ]]; then - fail "T-FIXTURE-4: poll-pr-reviews.sh --after should succeed" "exit=0" "exit=$exit_code" - return - fi - - # Should have claude[bot] approval at 13:00 and codex approval at 13:30 - local comment_count - comment_count=$(echo "$output" | jq '.comments | length') - - # At minimum, should have the late approvals - if [[ "$comment_count" -lt 1 ]]; then - fail "T-FIXTURE-4: Should include late approvals" ">=1" "$comment_count" - return - fi - - # Should NOT include the CHANGES_REQUESTED from 11:00 (before our --after) - local changes_requested - changes_requested=$(echo "$output" | jq '[.comments[]? | select(.body | test("security concerns"))] | length') - - if [[ "$changes_requested" -gt 0 ]]; then - fail "T-FIXTURE-4: Should exclude comments before --after" "0" "$changes_requested" - return - fi - - pass "T-FIXTURE-4: poll-pr-reviews.sh --after filter excludes early comments" - cd "$SCRIPT_DIR" -} - -# Run fixture-backed tests -test_fetch_pr_comments_with_fixtures -test_fetch_pr_comments_after_filter -test_poll_pr_reviews_with_fixtures -test_poll_pr_reviews_after_filter - -# ======================================== -# Wrong-Round Validation Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Wrong-Round Validation" -echo "========================================" -echo "" - -# Test: Wrong-round pr-resolve write is blocked -test_wrong_round_pr_resolve_blocked() { - cd "$TEST_DIR" - - local timestamp="2026-01-18_15-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - # State says current_round is 2 - cat > "$loop_dir/state.md" << EOF ---- -current_round: 2 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Try to write to round-0 (wrong round) - local hook_input='{"tool_name": "Write", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/round-0-pr-resolve.md", "content": "wrong round"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 2 ]] && echo "$output" | grep -qi "wrong round"; then - pass "T-ROUND-1: Wrong-round pr-resolve write is blocked" - else - fail "T-ROUND-1: Wrong-round pr-resolve should be blocked" "exit=2, wrong round" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Test: Correct-round pr-resolve write is allowed -test_correct_round_pr_resolve_allowed() { - cd "$TEST_DIR" - - local timestamp="2026-01-18_15-01-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - # State says current_round is 2 - cat > "$loop_dir/state.md" << EOF ---- -current_round: 2 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Write to round-2 (correct round) - local hook_input='{"tool_name": "Write", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/round-2-pr-resolve.md", "content": "correct round"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 0 ]]; then - pass "T-ROUND-2: Correct-round pr-resolve write is allowed" - else - fail "T-ROUND-2: Correct-round pr-resolve should be allowed" "exit=0" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Test: Wrong-round pr-resolve edit is blocked -test_wrong_round_pr_resolve_edit_blocked() { - cd "$TEST_DIR" - - local timestamp="2026-01-18_15-02-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 3 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Try to edit round-1 (wrong round) - local hook_input='{"tool_name": "Edit", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/round-1-pr-resolve.md", "old_string": "x", "new_string": "y"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 2 ]] && echo "$output" | grep -qi "wrong round"; then - pass "T-ROUND-3: Wrong-round pr-resolve edit is blocked" - else - fail "T-ROUND-3: Wrong-round pr-resolve edit should be blocked" "exit=2, wrong round" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Run wrong-round validation tests -test_wrong_round_pr_resolve_blocked -test_correct_round_pr_resolve_allowed -test_wrong_round_pr_resolve_edit_blocked - -# ======================================== -# Monitor PR Active Bots Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Monitor PR Active Bots Display" -echo "========================================" -echo "" - -# Test: Monitor parses YAML list for active_bots -test_monitor_yaml_list_parsing() { - local test_subdir="$TEST_DIR/monitor_yaml_test" - mkdir -p "$test_subdir" - - # Use helper script to create state file (avoids validator blocking) - "$SCRIPT_DIR/setup-monitor-test-env.sh" "$test_subdir" yaml_list >/dev/null - - # Source the humanize script and run monitor from test subdirectory (use --once for non-interactive) - cd "$test_subdir" - local output - output=$(source "$PROJECT_ROOT/scripts/humanize.sh" && humanize monitor pr --once 2>&1) || true - cd "$SCRIPT_DIR" - - # Check that active bots are displayed correctly (comma-separated) - if echo "$output" | grep -q "Active Bots:.*claude.*codex\|Active Bots:.*codex.*claude"; then - pass "T-MONITOR-1: Monitor parses and displays YAML list active_bots" - else - # Also accept claude,codex format - if echo "$output" | grep -q "Active Bots:.*claude,codex\|Active Bots:.*codex,claude"; then - pass "T-MONITOR-1: Monitor parses and displays YAML list active_bots" - else - fail "T-MONITOR-1: Monitor should display active bots from YAML list" "claude,codex" "got: $output" - fi - fi -} - -# Test: Monitor shows configured_bots separately -test_monitor_configured_bots() { - local test_subdir="$TEST_DIR/monitor_configured_test" - mkdir -p "$test_subdir" - - # Use helper script to create state file (avoids validator blocking) - "$SCRIPT_DIR/setup-monitor-test-env.sh" "$test_subdir" configured >/dev/null - - # Source the humanize script and run monitor from test subdirectory (use --once for non-interactive) - cd "$test_subdir" - local output - output=$(source "$PROJECT_ROOT/scripts/humanize.sh" && humanize monitor pr --once 2>&1) || true - cd "$SCRIPT_DIR" - - # Check that both configured and active bots are displayed - if echo "$output" | grep -q "Configured Bots:.*claude.*codex\|Configured Bots:.*codex.*claude\|Configured Bots:.*claude,codex\|Configured Bots:.*codex,claude"; then - pass "T-MONITOR-2: Monitor displays configured_bots" - else - fail "T-MONITOR-2: Monitor should display configured bots" "claude,codex" "got: $output" - fi -} - -# Test: Monitor shows 'none' when active_bots is empty -test_monitor_empty_active_bots() { - local test_subdir="$TEST_DIR/monitor_empty_test" - mkdir -p "$test_subdir" - - # Use helper script to create state file (avoids validator blocking) - "$SCRIPT_DIR/setup-monitor-test-env.sh" "$test_subdir" empty >/dev/null - - # Source the humanize script and run monitor from test subdirectory (use --once for non-interactive) - cd "$test_subdir" - local output - output=$(source "$PROJECT_ROOT/scripts/humanize.sh" && humanize monitor pr --once 2>&1) || true - cd "$SCRIPT_DIR" - - # Check that active bots shows 'none' - if echo "$output" | grep -q "Active Bots:.*none"; then - pass "T-MONITOR-3: Monitor shows 'none' for empty active_bots" - else - fail "T-MONITOR-3: Monitor should show 'none' for empty active_bots" "none" "got: $output" - fi -} - -# Run monitor tests -test_monitor_yaml_list_parsing -test_monitor_configured_bots -test_monitor_empty_active_bots - -} diff --git a/tests/test-pr-loop-lib.sh b/tests/test-pr-loop-lib.sh deleted file mode 100644 index a619e052..00000000 --- a/tests/test-pr-loop-lib.sh +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env bash -# -# Common library for PR loop tests -# -# Provides shared setup, helpers, and mock functions used by all test modules. -# -# Usage: source test-pr-loop-lib.sh -# - -# Determine script location -if [[ -z "${TEST_PR_LOOP_LIB_LOADED:-}" ]]; then - TEST_PR_LOOP_LIB_LOADED=1 - - # Get directories if not already set - SCRIPT_DIR="${SCRIPT_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)}" - PROJECT_ROOT="${PROJECT_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd)}" - - # Source test helpers if not already sourced - if ! declare -f setup_test_dir &>/dev/null; then - source "$SCRIPT_DIR/test-helpers.sh" - fi - - # ======================================== - # Mock Creation Functions - # ======================================== - - # Create mock scripts for gh CLI - create_mock_gh() { - local mock_dir="$1" - mkdir -p "$mock_dir" - - cat > "$mock_dir/gh" << 'MOCK_GH' -#!/usr/bin/env bash -# Mock gh CLI for testing - -case "$1" in - auth) - if [[ "$2" == "status" ]]; then - echo "Logged in to github.com" - exit 0 - fi - ;; - repo) - if [[ "$2" == "view" ]]; then - if [[ "$3" == "--json" && "$4" == "owner" ]]; then - echo '{"login": "testowner"}' - elif [[ "$3" == "--json" && "$4" == "name" ]]; then - echo '{"name": "testrepo"}' - fi - exit 0 - fi - ;; - pr) - if [[ "$2" == "view" ]]; then - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T12:00:00Z" - exit 0 - elif [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T12:00:00Z"}]}' - exit 0 - elif [[ "$3" == "--json" && "$4" == "number" ]]; then - echo '{"number": 123}' - exit 0 - elif [[ "$3" == "--json" && "$4" == "state" ]] || [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - exit 0 - fi - ;; - api) - # Handle user endpoint - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return empty arrays for comment/review fetching - echo "[]" - exit 0 - ;; -esac - -echo "Mock gh: unhandled command: $*" >&2 -exit 1 -MOCK_GH - chmod +x "$mock_dir/gh" - } - - # Create mock codex command - create_mock_codex() { - local mock_dir="$1" - - cat > "$mock_dir/codex" << 'MOCK_CODEX' -#!/usr/bin/env bash -# Mock codex CLI for testing -echo "Mock codex output" -exit 0 -MOCK_CODEX - chmod +x "$mock_dir/codex" - } - - # ======================================== - # Test Environment Setup - # ======================================== - - # Initialize test environment (call once at start of test run) - init_pr_loop_test_env() { - setup_test_dir - - # Create mock scripts directory and wire it into PATH - MOCK_BIN_DIR="$TEST_DIR/mock_bin" - mkdir -p "$MOCK_BIN_DIR" - export PATH="$MOCK_BIN_DIR:$PATH" - - # Initialize mock gh and codex in the PATH - create_mock_gh "$MOCK_BIN_DIR" - create_mock_codex "$MOCK_BIN_DIR" - - export MOCK_BIN_DIR - } - - # ======================================== - # Test Result Summary - # ======================================== - - # Print test summary and exit with appropriate code - print_test_summary() { - echo "" - echo "========================================" - echo "PR Loop Tests" - echo "========================================" - echo -e "Passed: \033[0;32m$TESTS_PASSED\033[0m" - echo -e "Failed: \033[0;31m$TESTS_FAILED\033[0m" - echo "" - - if [[ $TESTS_FAILED -gt 0 ]]; then - echo -e "\033[0;31mSome tests failed!\033[0m" - return 1 - else - echo -e "\033[0;32mAll tests passed!\033[0m" - return 0 - fi - } -fi diff --git a/tests/test-pr-loop-scripts.sh b/tests/test-pr-loop-scripts.sh deleted file mode 100644 index d77b9067..00000000 --- a/tests/test-pr-loop-scripts.sh +++ /dev/null @@ -1,410 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop Script Tests -# -# Tests for script argument parsing and validation: -# - setup-pr-loop.sh -# - cancel-pr-loop.sh -# - fetch-pr-comments.sh -# - poll-pr-reviews.sh -# -# Usage: source test-pr-loop-scripts.sh && run_script_tests -# - -# ======================================== -# setup-pr-loop.sh Tests -# ======================================== - -run_setup_tests() { - echo "" - echo "========================================" - echo "Testing setup-pr-loop.sh" - echo "========================================" - echo "" - - SETUP_SCRIPT="$PROJECT_ROOT/scripts/setup-pr-loop.sh" - - # Test: Help flag works - test_setup_help() { - local output - output=$("$SETUP_SCRIPT" --help 2>&1) || true - if echo "$output" | grep -q "start-pr-loop"; then - pass "T-POS-1: --help displays usage information" - else - fail "T-POS-1: --help should display usage information" - fi - } - - # Test: Missing bot flag shows error - test_setup_no_bot_flag() { - local output - local exit_code - output=$("$SETUP_SCRIPT" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "at least one bot flag"; then - pass "T-NEG-1: Missing bot flag shows error" - else - fail "T-NEG-1: Missing bot flag should show error" "exit code != 0 and error message" "exit=$exit_code, output=$output" - fi - } - - # Test: Invalid bot flag shows error - test_setup_invalid_bot() { - local output - local exit_code - output=$("$SETUP_SCRIPT" --invalid-bot 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "unknown option"; then - pass "T-NEG-2: Invalid bot flag shows error" - else - fail "T-NEG-2: Invalid bot flag should show error" "exit code != 0" "exit=$exit_code" - fi - } - - # Test: --claude flag is recognized - test_setup_claude_flag() { - # This will fail because no git repo, but we test that --claude is parsed - local output - output=$("$SETUP_SCRIPT" --claude 2>&1) || true - - # Should not complain about missing bot flag - if ! echo "$output" | grep -qi "at least one bot flag"; then - pass "T-POS-2: --claude flag is recognized" - else - fail "T-POS-2: --claude flag should be recognized" - fi - } - - # Test: --codex flag is recognized - test_setup_codex_flag() { - local output - output=$("$SETUP_SCRIPT" --codex 2>&1) || true - - if ! echo "$output" | grep -qi "at least one bot flag"; then - pass "T-POS-3: --codex flag is recognized" - else - fail "T-POS-3: --codex flag should be recognized" - fi - } - - # Test: Both bot flags work together - test_setup_both_bots() { - local output - output=$("$SETUP_SCRIPT" --claude --codex 2>&1) || true - - if ! echo "$output" | grep -qi "at least one bot flag"; then - pass "T-POS-4: Both bot flags work together" - else - fail "T-POS-4: Both bot flags should work together" - fi - } - - # Test: --max argument is parsed - test_setup_max_arg() { - local output - output=$("$SETUP_SCRIPT" --claude --max 10 2>&1) || true - - # Should not complain about --max - if ! echo "$output" | grep -qi "max requires"; then - pass "T-POS-5: --max argument is parsed" - else - fail "T-POS-5: --max argument should be parsed" - fi - } - - # Test: --max with invalid value shows error - test_setup_max_invalid() { - local output - local exit_code - output=$("$SETUP_SCRIPT" --claude --max abc 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "must be.*integer"; then - pass "T-NEG-3: --max with invalid value shows error" - else - fail "T-NEG-3: --max with invalid value should show error" - fi - } - - # Test: --codex-model argument is parsed - test_setup_codex_model() { - local output - output=$("$SETUP_SCRIPT" --claude --codex-model gpt-4:high 2>&1) || true - - if ! echo "$output" | grep -qi "codex-model requires"; then - pass "T-POS-6: --codex-model argument is parsed" - else - fail "T-POS-6: --codex-model argument should be parsed" - fi - } - - # Test: --codex-timeout argument is parsed - test_setup_codex_timeout() { - local output - output=$("$SETUP_SCRIPT" --claude --codex-timeout 1800 2>&1) || true - - if ! echo "$output" | grep -qi "codex-timeout requires"; then - pass "T-POS-7: --codex-timeout argument is parsed" - else - fail "T-POS-7: --codex-timeout argument should be parsed" - fi - } - - # Run setup tests - test_setup_help - test_setup_no_bot_flag - test_setup_invalid_bot - test_setup_claude_flag - test_setup_codex_flag - test_setup_both_bots - test_setup_max_arg - test_setup_max_invalid - test_setup_codex_model - test_setup_codex_timeout -} - -# ======================================== -# cancel-pr-loop.sh Tests -# ======================================== - -run_cancel_tests() { - echo "" - echo "========================================" - echo "Testing cancel-pr-loop.sh" - echo "========================================" - echo "" - - CANCEL_SCRIPT="$PROJECT_ROOT/scripts/cancel-pr-loop.sh" - - # Test: Help flag works - test_cancel_help() { - local output - output=$("$CANCEL_SCRIPT" --help 2>&1) || true - if echo "$output" | grep -q "cancel-pr-loop"; then - pass "T-POS-8: --help displays usage information" - else - fail "T-POS-8: --help should display usage information" - fi - } - - # Test: No loop returns NO_LOOP - test_cancel_no_loop() { - cd "$TEST_DIR" - # Export CLAUDE_PROJECT_DIR to ensure cancel script looks in test dir - export CLAUDE_PROJECT_DIR="$TEST_DIR" - local output - local exit_code - output=$("$CANCEL_SCRIPT" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - unset CLAUDE_PROJECT_DIR - - if [[ $exit_code -eq 1 ]] && echo "$output" | grep -q "NO_LOOP"; then - pass "T-NEG-4: No active loop returns NO_LOOP" - else - fail "T-NEG-4: No active loop should return NO_LOOP" "exit=1, NO_LOOP" "exit=$exit_code, output=$output" - fi - cd - > /dev/null - } - - # Test: Cancel works with active loop - test_cancel_active_loop() { - cd "$TEST_DIR" - # Export CLAUDE_PROJECT_DIR to ensure cancel script looks in test dir - export CLAUDE_PROJECT_DIR="$TEST_DIR" - - # Create mock loop directory - local timestamp="2026-01-18_12-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 1 -max_iterations: 42 -pr_number: 123 ---- -EOF - - local output - local exit_code - output=$("$CANCEL_SCRIPT" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - unset CLAUDE_PROJECT_DIR - - if [[ $exit_code -eq 0 ]] && echo "$output" | grep -q "CANCELLED"; then - if [[ -f "$loop_dir/cancel-state.md" ]] && [[ ! -f "$loop_dir/state.md" ]]; then - pass "T-POS-9: Cancel works and renames state file" - else - fail "T-POS-9: Cancel should rename state.md to cancel-state.md" - fi - else - fail "T-POS-9: Cancel should work with active loop" "exit=0, CANCELLED" "exit=$exit_code" - fi - - cd - > /dev/null - } - - # Run cancel tests - test_cancel_help - test_cancel_no_loop - test_cancel_active_loop -} - -# ======================================== -# fetch-pr-comments.sh Tests -# ======================================== - -run_fetch_tests() { - echo "" - echo "========================================" - echo "Testing fetch-pr-comments.sh" - echo "========================================" - echo "" - - FETCH_SCRIPT="$PROJECT_ROOT/scripts/fetch-pr-comments.sh" - - # Test: Help flag works - test_fetch_help() { - local output - output=$("$FETCH_SCRIPT" --help 2>&1) || true - if echo "$output" | grep -q "fetch-pr-comments"; then - pass "T-POS-10: --help displays usage information" - else - fail "T-POS-10: --help should display usage information" - fi - } - - # Test: Missing PR number shows error - test_fetch_no_pr() { - local output - local exit_code - output=$("$FETCH_SCRIPT" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "pr number.*required"; then - pass "T-NEG-5: Missing PR number shows error" - else - fail "T-NEG-5: Missing PR number should show error" - fi - } - - # Test: Missing output file shows error - test_fetch_no_output() { - local output - local exit_code - output=$("$FETCH_SCRIPT" 123 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "output file.*required"; then - pass "T-NEG-6: Missing output file shows error" - else - fail "T-NEG-6: Missing output file should show error" - fi - } - - # Test: Invalid PR number shows error - test_fetch_invalid_pr() { - local output - local exit_code - output=$("$FETCH_SCRIPT" abc /tmp/out.md 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "invalid pr number"; then - pass "T-NEG-7: Invalid PR number shows error" - else - fail "T-NEG-7: Invalid PR number should show error" - fi - } - - # Run fetch tests - test_fetch_help - test_fetch_no_pr - test_fetch_no_output - test_fetch_invalid_pr -} - -# ======================================== -# poll-pr-reviews.sh Tests -# ======================================== - -run_poll_tests() { - echo "" - echo "========================================" - echo "Testing poll-pr-reviews.sh" - echo "========================================" - echo "" - - POLL_SCRIPT="$PROJECT_ROOT/scripts/poll-pr-reviews.sh" - - # Test: Help flag works - test_poll_help() { - local output - output=$("$POLL_SCRIPT" --help 2>&1) || true - if echo "$output" | grep -q "poll-pr-reviews"; then - pass "T-POS-11: --help displays usage information" - else - fail "T-POS-11: --help should display usage information" - fi - } - - # Test: Missing PR number shows error - test_poll_no_pr() { - local output - local exit_code - output=$("$POLL_SCRIPT" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "pr number.*required"; then - pass "T-NEG-8: Missing PR number shows error" - else - fail "T-NEG-8: Missing PR number should show error" - fi - } - - # Test: Missing --after shows error - test_poll_no_after() { - local output - local exit_code - output=$("$POLL_SCRIPT" 123 --bots claude 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "after.*required"; then - pass "T-NEG-9: Missing --after shows error" - else - fail "T-NEG-9: Missing --after should show error" - fi - } - - # Test: Missing --bots shows error - test_poll_no_bots() { - local output - local exit_code - output=$("$POLL_SCRIPT" 123 --after 2026-01-18T00:00:00Z 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "bots.*required"; then - pass "T-NEG-10: Missing --bots shows error" - else - fail "T-NEG-10: Missing --bots should show error" - fi - } - - # Run poll tests - test_poll_help - test_poll_no_pr - test_poll_no_after - test_poll_no_bots -} - -# ======================================== -# Main Entry Point -# ======================================== - -run_script_tests() { - run_setup_tests - run_cancel_tests - run_fetch_tests - run_poll_tests -} diff --git a/tests/test-pr-loop-stophook.sh b/tests/test-pr-loop-stophook.sh deleted file mode 100644 index a73f8a4b..00000000 --- a/tests/test-pr-loop-stophook.sh +++ /dev/null @@ -1,1782 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop Stop Hook Tests -# -# Tests for the stop hook functionality: -# - Force push detection -# - Trigger validation -# - Bot timeout handling -# - State file management -# - Dynamic startup_case updates -# -# Usage: source test-pr-loop-stophook.sh && run_stophook_tests -# - -run_stophook_tests() { -# ======================================== -# Stop-Hook Integration Tests -# ======================================== - -# Test: Force push trigger validation - old triggers rejected after force push -test_stophook_force_push_rejects_old_trigger() { - local test_subdir="$TEST_DIR/stophook_force_push_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file with latest_commit_at set to AFTER the old trigger comment - # This simulates: force push happened after the old trigger was posted - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 1 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 4 -latest_commit_sha: newsha123 -latest_commit_at: 2026-01-18T14:00:00Z ---- -EOF - - # Create resolve file - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-1-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns OLD trigger comment (BEFORE latest_commit_at) - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -# Check if --jq is in arguments (for transformed format) -HAS_JQ=false -for arg in "$@"; do - if [[ "$arg" == "--jq" || "$arg" == "-q" ]]; then - HAS_JQ=true - break - fi -done - -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - # Return old trigger comment from 12:00 (BEFORE latest_commit_at of 14:00) - if [[ "$HAS_JQ" == "true" ]]; then - # With --jq --paginate, output one transformed object per line - echo '{"id": 1, "author": "testuser", "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review"}' - else - # Raw GitHub API format - echo '[{"id": 1, "user": {"login": "testuser"}, "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review"}]' - fi - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T10:00:00Z" - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "newsha123" # Match state file - else - echo "/tmp/git" - fi - ;; - status) echo "" ;; - merge-base) exit 0 ;; # Pretend no force push in this test -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook and capture output - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # The old trigger should be rejected because it's before latest_commit_at - # Stop hook should block requiring a new trigger - if echo "$hook_output" | grep -qi "trigger\|comment @\|re-trigger\|no trigger"; then - pass "T-STOPHOOK-1: Force push validation rejects old trigger comment" - else - fail "T-STOPHOOK-1: Should reject old trigger after force push" "block/require trigger" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Step 7 Case 1 exception - no trigger required for startup_case=1, round=0 -test_stophook_case1_no_trigger_required() { - local test_subdir="$TEST_DIR/stophook_case1_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file with startup_case=1 and round=0 - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 2 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - # Create resolve file - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns no trigger comments, but has codex +1 - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - if [[ "$2" == *"/issues/"*"/reactions"* ]]; then - # Return codex +1 reaction (triggers approval) - echo '[{"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T10:05:00Z"}]' - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - echo '[]' # No comments - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - else - echo "/tmp/git" - fi - ;; - status) echo "" ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_stderr - hook_stderr=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1 >/dev/null) || true - - # Case 1 exception: should NOT block for missing trigger - if echo "$hook_stderr" | grep -q "trigger not required\|Case 1\|startup_case=1"; then - pass "T-STOPHOOK-2: Case 1 exception - no trigger required" - else - # Alternative: check that it didn't block - if ! echo "$hook_stderr" | grep -qi "block.*trigger\|missing.*trigger\|comment @"; then - pass "T-STOPHOOK-2: Case 1 exception - no trigger required (no block)" - else - fail "T-STOPHOOK-2: Case 1 should not require trigger" "no block" "got: $hook_stderr" - fi - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Step 9 - APPROVE creates approve-state.md -test_stophook_approve_creates_state() { - local test_subdir="$TEST_DIR/stophook_approve_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file with empty active_bots (YAML list format, no items) - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 1 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: 2026-01-18T11:00:00Z -trigger_comment_id: 123 -startup_case: 3 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - # Create resolve file (required by stop hook) - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-1-pr-resolve.md" - - export CLAUDE_PROJECT_DIR="$test_subdir" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - else - echo "/tmp/git" - fi - ;; - status) echo "" ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export PATH="$mock_bin:$PATH" - - # Run stop hook - with empty active_bots, it should approve - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Check for approve-state.md creation - if [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - pass "T-STOPHOOK-3: APPROVE creates approve-state.md" - else - # Alternative: check output for approval message - if echo "$hook_output" | grep -qi "approved\|complete"; then - pass "T-STOPHOOK-3: APPROVE creates approve-state.md (via message)" - else - fail "T-STOPHOOK-3: Should create approve-state.md" "approve-state.md exists" "not found" - fi - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Dynamic startup_case update when new comments arrive -test_stophook_dynamic_startup_case() { - local test_subdir="$TEST_DIR/stophook_dynamic_case_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Start with startup_case=1 (no comments) - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude - - codex -active_bots: - - claude - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 2 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns bot comments (simulating comments arriving) - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return bot comments (claude and codex have commented) - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - echo '[{"id":1,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T10:05:00Z","body":"Found issue"},{"id":2,"user":{"login":"chatgpt-codex-connector[bot]"},"created_at":"2026-01-18T10:06:00Z","body":"Also found issue"}]' - exit 0 - fi - if [[ "$2" == *"/pulls/"*"/reviews"* ]]; then - echo '[]' - exit 0 - fi - if [[ "$2" == *"/pulls/"*"/comments"* ]]; then - echo '[]' - exit 0 - fi - if [[ "$2" == *"/reactions"* ]]; then - echo '[]' - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T09:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T09:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - else - echo "/tmp/git" - fi - ;; - status) echo "" ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook with timeout (it may poll, so limit to 5 seconds) - timeout 5 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT" >/dev/null 2>&1 || true - - # Check if startup_case was updated in state file - local new_case - new_case=$(grep "^startup_case:" "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" 2>/dev/null | sed 's/startup_case: *//' | tr -d ' ' || true) - - # With both bots commented and no new commits, should be Case 3 - if [[ "$new_case" == "3" ]]; then - pass "T-STOPHOOK-4: Dynamic startup_case updated to 3 (all commented, no new commits)" - elif [[ -n "$new_case" && "$new_case" != "1" ]]; then - pass "T-STOPHOOK-4: Dynamic startup_case updated from 1 to $new_case" - else - fail "T-STOPHOOK-4: startup_case should update dynamically" "case 3" "got: $new_case" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Step 6 - unpushed commits block exit -test_stophook_step6_unpushed_commits() { - local test_subdir="$TEST_DIR/stophook_step6_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - # Mock git that reports unpushed commits - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" # Clean working directory - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch...origin/test-branch [ahead 2]" # 2 unpushed commits - fi - ;; - branch) - echo "test-branch" - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Should block with unpushed commits message - if echo "$hook_output" | grep -qi "unpushed\|ahead\|push.*commit"; then - pass "T-STOPHOOK-5: Step 6 blocks on unpushed commits" - else - fail "T-STOPHOOK-5: Step 6 should block on unpushed commits" "unpushed/ahead message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Step 6.5 - force push detection with actual history rewrite simulation -test_stophook_step65_force_push_detection() { - local test_subdir="$TEST_DIR/stophook_step65_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # State with old commit SHA - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: 2026-01-18T10:30:00Z -trigger_comment_id: 999 -startup_case: 1 -latest_commit_sha: oldsha123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T12:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T12:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - # Mock git that simulates force push: old commit is NOT ancestor of current HEAD - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "newsha456" # Different from oldsha123 in state - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) - # Simulate force push: old commit is NOT an ancestor - # --is-ancestor exits 1 when not ancestor - exit 1 - ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Should detect force push and block - if echo "$hook_output" | grep -qi "force.*push\|history.*rewrite\|re-trigger"; then - pass "T-STOPHOOK-6: Step 6.5 detects force push (history rewrite)" - else - fail "T-STOPHOOK-6: Step 6.5 should detect force push" "force push message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Step 7 - missing trigger comment blocks (Case 4/5) -test_stophook_step7_missing_trigger() { - local test_subdir="$TEST_DIR/stophook_step7_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # State with startup_case=4 (requires trigger) but no trigger - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 4 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T12:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns no trigger comments - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - echo '[]' # No comments - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T12:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T12:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Should block with missing trigger message - if echo "$hook_output" | grep -qi "trigger\|@.*mention\|comment"; then - pass "T-STOPHOOK-7: Step 7 blocks on missing trigger (Case 4)" - else - fail "T-STOPHOOK-7: Step 7 should block on missing trigger" "trigger/mention message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Bot timeout auto-removes bot from active_bots -test_stophook_bot_timeout_auto_remove() { - local test_subdir="$TEST_DIR/stophook_timeout_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # State with short poll_timeout (2 seconds) to test timeout behavior - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 2 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: 2026-01-18T10:30:00Z -trigger_comment_id: 999 -startup_case: 3 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns NO bot comments (simulates bot not responding) - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return empty for all comment/review queries - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T10:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T10:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook with short timeout - it should time out and auto-remove bots - local hook_output - hook_output=$(timeout 10 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT") || true - - # Should either mention timeout or create approve-state (if all bots timed out) - if echo "$hook_output" | grep -qi "timeout\|timed out\|auto-remove\|approved"; then - pass "T-STOPHOOK-8: Bot timeout handling" - elif [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - pass "T-STOPHOOK-8: Bot timeout created approve-state.md" - else - fail "T-STOPHOOK-8: Bot timeout should trigger auto-remove" "timeout/approved message" "got: $hook_output" - fi - - # VERIFICATION: Check that active_bots was actually updated (removed the bot) - # After timeout, either: - # 1. approve-state.md exists with empty active_bots (all bots timed out) - # 2. state.md has the timed-out bot removed from active_bots - local state_file="" - if [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - state_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" - elif [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" ]]; then - state_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" - fi - - # VERIFICATION: Check that approve-state.md was created with empty active_bots - local approve_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" - if [[ -f "$approve_file" ]]; then - pass "T-STOPHOOK-8a: approve-state.md created - bot timeout led to loop completion" - # Verify active_bots is empty (not containing 'codex') - local active_bots_line - active_bots_line=$(grep "^active_bots:" "$approve_file" 2>/dev/null || true) - # After the line "active_bots:", check if there are any bot entries - local next_line_has_bot - next_line_has_bot=$(sed -n '/^active_bots:/,/^[a-z_]*:/p' "$approve_file" | grep -E '^\s*-\s*\w' || true) - if [[ -z "$next_line_has_bot" ]]; then - pass "T-STOPHOOK-8b: active_bots is empty after timeout" - else - fail "T-STOPHOOK-8b: active_bots should be empty after timeout" "no bots listed" "got: $next_line_has_bot" - fi - else - fail "T-STOPHOOK-8a: approve-state.md should exist after bot timeout" "approve-state.md exists" "file not found" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Codex +1 detection removes codex from active_bots -test_stophook_codex_thumbsup_approval() { - local test_subdir="$TEST_DIR/stophook_thumbsup_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # State with startup_case=1 (required for +1 check) and only codex as active bot - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 2 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns +1 reaction from codex - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return +1 reaction for PR reactions query - if [[ "$2" == *"/issues/"*"/reactions"* ]]; then - echo '[{"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T10:05:00Z"}]' - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T10:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T10:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Should detect +1 and create approve-state.md (since codex is only bot) - if echo "$hook_output" | grep -qi "+1\|thumbsup\|approved"; then - pass "T-STOPHOOK-9: Codex +1 detection" - elif [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - pass "T-STOPHOOK-9: Codex +1 created approve-state.md" - else - fail "T-STOPHOOK-9: Codex +1 should be detected" "+1/approved message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Claude eyes timeout blocks exit -test_stophook_claude_eyes_timeout() { - local test_subdir="$TEST_DIR/stophook_eyes_timeout_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # State with claude configured and trigger required (round > 0) - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 1 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: 2026-01-18T11:00:00Z -trigger_comment_id: 12345 -startup_case: 3 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-1-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns NO eyes reaction (simulates claude bot not configured) - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -# Check if --jq is in arguments (for transformed format) -HAS_JQ=false -for arg in "$@"; do - if [[ "$arg" == "--jq" || "$arg" == "-q" ]]; then - HAS_JQ=true - break - fi -done - -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return empty reactions - no eyes - if [[ "$2" == *"/reactions"* ]]; then - echo "[]" - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - # Return trigger comment - if [[ "$HAS_JQ" == "true" ]]; then - # With --jq --paginate, output one transformed object per line - echo '{"id": 12345, "author": "testuser", "created_at": "2026-01-18T11:00:00Z", "body": "@claude please review"}' - else - # Raw GitHub API format - echo '[{"id": 12345, "user": {"login": "testuser"}, "created_at": "2026-01-18T11:00:00Z", "body": "@claude please review"}]' - fi - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T10:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T10:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run with timeout since eyes check has 3x5s retry (15s total) - local hook_output - hook_output=$(timeout 20 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT") || true - - # Should block with eyes timeout message - if echo "$hook_output" | grep -qi "eyes\|not responding\|timeout\|bot.*configured"; then - pass "T-STOPHOOK-10: Claude eyes timeout blocks exit" - else - fail "T-STOPHOOK-10: Claude eyes timeout should block" "eyes/timeout message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Dynamic startup_case update when comments arrive -test_stophook_dynamic_startup_case_update() { - local test_subdir="$TEST_DIR/stophook_dynamic_case_test2" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Use dynamic timestamps to ensure polling doesn't time out immediately - # Timeline: commit -> trigger -> comment (all recent, all within poll_timeout) - local trigger_ts commit_ts comment_ts - # Trigger was 10 seconds ago - trigger_ts=$(date -u -d "-10 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-10S +%Y-%m-%dT%H:%M:%SZ) - # Commit was 60 seconds ago (before trigger) - commit_ts=$(date -u -d "-60 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-60S +%Y-%m-%dT%H:%M:%SZ) - # Comment arrived 5 seconds ago (after trigger, after commit -> case 3) - comment_ts=$(date -u -d "-5 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-5S +%Y-%m-%dT%H:%M:%SZ) - - # Start with startup_case=1 (no comments initially), then comments arrive - # Provide a trigger comment to proceed past timeout checks - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 60 -started_at: $commit_ts -last_trigger_at: $trigger_ts -trigger_comment_id: 999 -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: $commit_ts ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns bot comments (simulating comments arriving) - # IMPORTANT: poll-pr-reviews.sh expects RAW GitHub API format (with .user.login) - # check-pr-reviewer-status.sh uses --jq so needs transformed format - # Use COMMENT_TS environment variable for dynamic timestamp - cat > "$mock_bin/gh" << MOCK_GH -#!/usr/bin/env bash -# Dynamic comment timestamp from test setup -COMMENT_TS="$comment_ts" -COMMIT_TS="$commit_ts" - -# Check if --jq is in arguments and what type of jq expression -HAS_JQ=false -JQ_RETURNS_ARRAY=false -ARGS=("\$@") -for ((i=0; i<\${#ARGS[@]}; i++)); do - if [[ "\${ARGS[i]}" == "--jq" || "\${ARGS[i]}" == "-q" ]]; then - HAS_JQ=true - # Check next argument for jq expression starting with [ - next_idx=\$((i + 1)) - if [[ \$next_idx -lt \${#ARGS[@]} ]]; then - next_arg="\${ARGS[next_idx]}" - if [[ "\$next_arg" == "["* ]]; then - JQ_RETURNS_ARRAY=true - fi - fi - fi -done - -case "\$1" in - repo) - # check-pr-reviewer-status.sh needs repo owner/name with jq transformation - if [[ "\$*" == *"--json owner,name"* ]] || [[ "\$*" == *"--json owner"* && "\$*" == *"--json name"* ]]; then - if [[ "\$HAS_JQ" == "true" ]]; then - # jq '.owner.login + "/" + .name' returns "owner/repo" - echo "testowner/testrepo" - else - echo '{"owner": {"login": "testowner"}, "name": "testrepo"}' - fi - exit 0 - fi - if [[ "\$*" == *"--json parent"* ]]; then - if [[ "\$HAS_JQ" == "true" ]]; then - # jq '.parent.owner.login + "/" + .parent.name' returns empty for non-fork - echo "" - else - echo '{"parent": null}' - fi - exit 0 - fi - ;; - api) - if [[ "\$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return codex comment - format depends on whether --jq is used and its pattern - if [[ "\$2" == *"/issues/"*"/comments"* ]]; then - if [[ "\$HAS_JQ" == "true" ]]; then - if [[ "\$JQ_RETURNS_ARRAY" == "true" ]]; then - # check-pr-reviewer-status.sh uses '[.[] | {...}]' - returns array - echo "[{\"author\":\"chatgpt-codex-connector[bot]\",\"created_at\":\"\$COMMENT_TS\",\"body\":\"Found issues\"}]" - else - # stop hook uses '.[] | {...}' then 'jq -s' - returns individual objects - echo "{\"id\":1001,\"author\":\"chatgpt-codex-connector[bot]\",\"created_at\":\"\$COMMENT_TS\",\"body\":\"Found issues\"}" - fi - else - # Raw GitHub API format for poll-pr-reviews.sh - echo "[{\"id\":1001,\"user\":{\"login\":\"chatgpt-codex-connector[bot]\",\"type\":\"Bot\"},\"created_at\":\"\$COMMENT_TS\",\"body\":\"Found issues\"}]" - fi - exit 0 - fi - if [[ "\$2" == *"/pulls/"*"/reviews"* ]]; then - echo '[]' - exit 0 - fi - if [[ "\$2" == *"/pulls/"*"/comments"* ]]; then - echo '[]' - exit 0 - fi - if [[ "\$2" == *"/reactions"* ]]; then - echo '[]' - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - # PR existence check: gh pr view --repo ... --json number -q .number - if [[ "\$*" == *"number"* ]] && [[ "\$*" != *"commits"* ]]; then - echo '{"number": 123}' - exit 0 - fi - if [[ "\$*" == *"commits"* ]] && [[ "\$*" == *"headRefOid"* ]]; then - # For check-pr-reviewer-status.sh: returns jq-processed format - # {sha: .headRefOid, date: (.commits | last | .committedDate)} - echo "{\"sha\":\"abc123\",\"date\":\"\$COMMIT_TS\"}" - exit 0 - fi - if [[ "\$*" == *"commits"* ]] && [[ "\$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used (stop hook commit fetch) - echo "\$COMMIT_TS" - exit 0 - fi - if [[ "\$*" == *"commits"* ]]; then - # Commit before the comment - echo "{\"commits\":[{\"committedDate\":\"\$COMMIT_TS\"}]}" - exit 0 - fi - if [[ "\$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook with timeout and capture output for debugging - local hook_output - hook_output=$(timeout 15 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT" 2>&1) || true - - # Check if startup_case was updated in state file (or approve-state.md if all bots approved/timed out) - local new_case state_file - if [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" ]]; then - state_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" - elif [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - state_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" - else - state_file="" - fi - - if [[ -n "$state_file" ]]; then - new_case=$(grep "^startup_case:" "$state_file" 2>/dev/null | sed 's/startup_case: *//' | tr -d ' ' || true) - else - new_case="" - fi - - # Verify startup_case is present in the updated state file (confirms re-evaluation code path ran) - if [[ -n "$new_case" ]]; then - pass "T-STOPHOOK-11: Hook completes with startup_case in state" - else - fail "T-STOPHOOK-11: startup_case should be preserved in state" "startup_case present" "got: empty/missing" - fi - - # VERIFICATION: Assert startup_case changed from initial value (1) to expected value - # Mock setup: codex comment at 10:05:00Z, commit at 09:00:00Z (before comment) - # Expected: Case 3 (all reviewers commented, no new commits after) - if [[ -n "$new_case" && "$new_case" != "1" ]]; then - pass "T-STOPHOOK-11a: startup_case changed from 1 to $new_case" - elif [[ -n "$new_case" && "$new_case" == "1" ]]; then - # Debug: check if stop hook re-evaluated startup_case - if echo "$hook_output" | grep -qi "Startup case changed"; then - # Re-evaluation ran but case didn't change in state file - state write issue - fail "T-STOPHOOK-11a: startup_case changed in hook but not persisted" "!= 1" "case_change logged but state=1" - elif echo "$hook_output" | grep -qi "check-pr-reviewer-status\|NEW_REVIEWER_STATUS"; then - # Re-evaluation script was called - fail "T-STOPHOOK-11a: startup_case check ran but returned 1" "!= 1" "got: 1" - else - # Re-evaluation didn't run - likely exited early - local exit_reason - exit_reason=$(echo "$hook_output" | grep -i "exit\|block\|timeout" | head -3 || echo "unknown") - fail "T-STOPHOOK-11a: startup_case re-evaluation not reached" "!= 1" "got: 1, exit: $exit_reason" - fi - else - fail "T-STOPHOOK-11a: startup_case should be present and changed" "number != 1" "got: empty" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Fork PR support - stop hook resolves base repo from parent -test_stophook_fork_pr_base_repo_resolution() { - local test_subdir="$TEST_DIR/stophook_fork_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 456 -start_branch: test-branch -configured_bots: - - codex -active_bots: -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that simulates a fork scenario: - # - Current repo (fork) doesn't have PR 456 - # - Parent repo (upstream) has PR 456 - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -# Track which repo we're querying -FORK_REPO="forkuser/forkrepo" -UPSTREAM_REPO="upstreamowner/upstreamrepo" - -case "$1" in - repo) - if [[ "$*" == *"--json owner,name"* ]]; then - # Current repo is the fork - echo "forkuser/forkrepo" - exit 0 - fi - if [[ "$*" == *"--json parent"* ]]; then - # Return parent (upstream) repo - echo "upstreamowner/upstreamrepo" - exit 0 - fi - ;; - pr) - # Check which --repo was specified - if [[ "$*" == *"--repo forkuser/forkrepo"* ]]; then - # Fork doesn't have PR 456 - return empty/error - exit 1 - fi - if [[ "$*" == *"--repo upstreamowner/upstreamrepo"* ]]; then - # Upstream has PR 456 - if [[ "$*" == *"number"* ]] && [[ "$*" != *"commits"* ]]; then - echo '{"number": 456}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - echo "2026-01-18T10:00:00Z" - exit 0 - fi - fi - # Default: try to handle without --repo (should fail for forks) - if [[ "$*" != *"--repo"* ]]; then - exit 1 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - else - echo "/tmp/git" - fi - ;; - status) echo "" ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook - should resolve PR from parent repo - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Should not fail with "PR not found" because it should have found it in parent repo - # And since active_bots is empty, it should approve - if echo "$hook_output" | grep -qi "approved\|complete"; then - pass "T-STOPHOOK-12: Fork PR support - resolved PR from parent repo" - elif [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - pass "T-STOPHOOK-12: Fork PR support - created approve-state.md" - else - # Check if it at least didn't fail with "PR not found" - if ! echo "$hook_output" | grep -qi "pr.*not.*found\|no.*pull.*request"; then - pass "T-STOPHOOK-12: Fork PR support - did not fail on PR lookup" - else - fail "T-STOPHOOK-12: Fork PR should resolve from parent" "success" "got: $hook_output" - fi - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Goal tracker - resolved count stays 0 when some bots have issues -test_stophook_goal_tracker_mixed_approval() { - local test_subdir="$TEST_DIR/stophook_goal_tracker_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Use dynamic timestamps to ensure polling doesn't time out immediately - # Timeline: commit -> trigger -> bot comments (all recent, within poll_timeout) - local trigger_ts commit_ts claude_ts codex_ts - # Trigger was 10 seconds ago - trigger_ts=$(date -u -d "-10 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-10S +%Y-%m-%dT%H:%M:%SZ) - # Commit was 60 seconds ago (before trigger) - commit_ts=$(date -u -d "-60 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-60S +%Y-%m-%dT%H:%M:%SZ) - # Claude comment arrived 5 seconds ago (after trigger) - claude_ts=$(date -u -d "-5 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-5S +%Y-%m-%dT%H:%M:%SZ) - # Codex comment arrived 4 seconds ago (after trigger) - codex_ts=$(date -u -d "-4 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-4S +%Y-%m-%dT%H:%M:%SZ) - - # State with two bots configured - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude - - codex -active_bots: - - claude - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 60 -started_at: $commit_ts -last_trigger_at: $trigger_ts -trigger_comment_id: 999 -startup_case: 3 -latest_commit_sha: abc123 -latest_commit_at: $commit_ts ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - # Create initial goal tracker - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/goal-tracker.md" << EOF -# PR Loop Goal Tracker - -## Stats -- Issues Found: 0 -- Issues Resolved: 0 - -## Log -| Round | Timestamp | Event | -|-------|-----------|-------| -| 0 | $commit_ts | Loop started | -EOF - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns: - # - claude: APPROVE (LGTM) - # - codex: ISSUES (has issues) - cat > "$mock_bin/gh" << MOCK_GH -#!/usr/bin/env bash -# Dynamic timestamps from test setup -CLAUDE_TS="$claude_ts" -CODEX_TS="$codex_ts" -COMMIT_TS="$commit_ts" - -HAS_JQ=false -for arg in "\$@"; do - if [[ "\$arg" == "--jq" || "\$arg" == "-q" ]]; then - HAS_JQ=true - break - fi -done - -case "\$1" in - repo) - if [[ "\$*" == *"--json owner,name"* ]]; then - echo "testowner/testrepo" - exit 0 - fi - if [[ "\$*" == *"--json parent"* ]]; then - echo "" - exit 0 - fi - ;; - api) - if [[ "\$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return comments from both bots - if [[ "\$2" == *"/issues/"*"/comments"* ]]; then - if [[ "\$HAS_JQ" == "true" ]]; then - # Claude approves, Codex has issues - echo "{\"id\": 1, \"author\": \"claude[bot]\", \"created_at\": \"\$CLAUDE_TS\", \"body\": \"LGTM! No issues found.\"}" - echo "{\"id\": 2, \"author\": \"chatgpt-codex-connector[bot]\", \"created_at\": \"\$CODEX_TS\", \"body\": \"Found 2 issues that need fixing.\"}" - else - echo "[{\"id\": 1, \"user\": {\"login\": \"claude[bot]\"}, \"created_at\": \"\$CLAUDE_TS\", \"body\": \"LGTM! No issues found.\"},{\"id\": 2, \"user\": {\"login\": \"chatgpt-codex-connector[bot]\"}, \"created_at\": \"\$CODEX_TS\", \"body\": \"Found 2 issues that need fixing.\"}]" - fi - exit 0 - fi - if [[ "\$2" == *"/reactions"* ]]; then - # Return eyes for claude (no need for this test but keep consistent) - echo "[]" - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - # PR existence check: gh pr view --repo ... --json number -q .number - if [[ "\$*" == *"number"* ]] && [[ "\$*" != *"commits"* ]]; then - echo '{"number": 123}' - exit 0 - fi - if [[ "\$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - if [[ "\$*" == *"commits"* ]] && [[ "\$*" == *"--jq"* ]]; then - echo "\$COMMIT_TS" - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - # Mock codex that outputs mixed approval - cat > "$mock_bin/codex" << 'MOCK_CODEX' -#!/usr/bin/env bash -# Mock codex output: claude approves, codex has issues -cat << 'CODEX_OUTPUT' -# PR Review Validation - -### Per-Bot Status -| Bot | Status | Summary | -|-----|--------|---------| -| claude | APPROVE | No issues found | -| codex | ISSUES | Found 2 issues that need fixing | - -### Issues Found (if any) -1. Issue from codex: Missing error handling -2. Issue from codex: Needs tests - -### Approved Bots (to remove from active_bots) -- claude - -### Final Recommendation -ISSUES_REMAINING -CODEX_OUTPUT -MOCK_CODEX - chmod +x "$mock_bin/codex" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook - local hook_output - hook_output=$(timeout 30 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT") || true - - # Verify that ISSUES_RESOLVED_COUNT is 0, not inflated to ISSUES_FOUND_COUNT - # The goal tracker should show issues found > 0 but resolved = 0 - # (because codex still has issues, even though claude approved) - - # Check the feedback file or check file for the correct issue counts - local check_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-1-pr-check.md" - if [[ -f "$check_file" ]]; then - # Check that issues were found - if grep -q "Issues Found\|ISSUES" "$check_file" 2>/dev/null; then - pass "T-STOPHOOK-13: Goal tracker correctly identifies issues" - else - fail "T-STOPHOOK-13: Check file should contain issues" "issues listed" "not found" - fi - else - # Check file may not exist if polling didn't complete - # Check output instead - if echo "$hook_output" | grep -qi "issues.*remaining\|ISSUES_REMAINING"; then - pass "T-STOPHOOK-13: Goal tracker correctly identifies issues (via output)" - else - fail "T-STOPHOOK-13: Should detect issues remaining" "issues_remaining" "got: $hook_output" - fi - fi - - # VERIFICATION: The key fix - resolved count should NOT be inflated - # Since we can't directly check ISSUES_RESOLVED_COUNT variable, verify the behavior: - # - claude approved (removed from active_bots) - # - codex has issues (stays in active_bots) - # - loop should continue (not complete) because codex still has issues - - if [[ ! -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - pass "T-STOPHOOK-13a: Loop continues with mixed approval (not prematurely completed)" - else - fail "T-STOPHOOK-13a: Loop should not complete with mixed approval" "no approve-state.md" "approve-state.md exists" - fi - - # Check that claude was removed from active_bots but codex remains - local state_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" - if [[ -f "$state_file" ]]; then - local active_bots_content - active_bots_content=$(sed -n '/^active_bots:/,/^[a-z_]*:/p' "$state_file" | grep -E '^\s*-' || true) - - if echo "$active_bots_content" | grep -q "codex"; then - pass "T-STOPHOOK-13b: Codex remains in active_bots (has issues)" - else - fail "T-STOPHOOK-13b: Codex should remain in active_bots" "codex in list" "got: $active_bots_content" - fi - - if ! echo "$active_bots_content" | grep -q "claude"; then - pass "T-STOPHOOK-13c: Claude removed from active_bots (approved)" - else - fail "T-STOPHOOK-13c: Claude should be removed from active_bots" "no claude" "got: $active_bots_content" - fi - fi - - unset CLAUDE_PROJECT_DIR -} - -# Run stop-hook integration tests -test_stophook_force_push_rejects_old_trigger -test_stophook_case1_no_trigger_required -test_stophook_approve_creates_state -test_stophook_step6_unpushed_commits -test_stophook_step65_force_push_detection -test_stophook_step7_missing_trigger -test_stophook_bot_timeout_auto_remove -test_stophook_codex_thumbsup_approval -test_stophook_claude_eyes_timeout -test_stophook_dynamic_startup_case_update -test_stophook_fork_pr_base_repo_resolution -test_stophook_goal_tracker_mixed_approval - -} diff --git a/tests/test-pr-loop-system.sh b/tests/test-pr-loop-system.sh deleted file mode 100755 index 05cf3b87..00000000 --- a/tests/test-pr-loop-system.sh +++ /dev/null @@ -1,1904 +0,0 @@ -#!/usr/bin/env bash -# -# Test runner for PR loop system -# -# Runs all tests in the tests/ directory using the mock gh CLI -# -# Usage: -# ./tests/run-tests.sh [test-name] -# -# Environment: -# TEST_VERBOSE=1 - Show verbose output - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -# Test configuration -TESTS_DIR="$SCRIPT_DIR" -MOCKS_DIR="$TESTS_DIR/mocks" -FIXTURES_DIR="$TESTS_DIR/fixtures" -TEST_VERBOSE="${TEST_VERBOSE:-0}" - -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[0;33m' -NC='\033[0m' # No Color - -# Counters -TESTS_RUN=0 -TESTS_PASSED=0 -TESTS_FAILED=0 - -# Test helper functions -log_test() { - echo -e "${YELLOW}[TEST]${NC} $1" -} - -log_pass() { - echo -e "${GREEN}[PASS]${NC} $1" - TESTS_PASSED=$((TESTS_PASSED + 1)) -} - -log_fail() { - echo -e "${RED}[FAIL]${NC} $1" - TESTS_FAILED=$((TESTS_FAILED + 1)) -} - -# Setup test environment -setup_test_env() { - # Add mocks to PATH - export PATH="$MOCKS_DIR:$PATH" - export MOCK_GH_FIXTURES_DIR="$FIXTURES_DIR" - - # Create temp directory for tests - export TEST_TEMP_DIR=$(mktemp -d) - export CLAUDE_PROJECT_DIR="$TEST_TEMP_DIR" - - # Initialize git repo for tests - ( - cd "$TEST_TEMP_DIR" - git init -q - git config user.email "test@example.com" - git config user.name "Test User" - git config commit.gpgsign false - echo "# Test" > README.md - git add README.md - git commit -q -m "Initial commit" - ) >/dev/null 2>&1 -} - -# Cleanup test environment -cleanup_test_env() { - if [[ -n "${TEST_TEMP_DIR:-}" && -d "$TEST_TEMP_DIR" ]]; then - rm -rf "$TEST_TEMP_DIR" - fi -} - -# Run a test function -run_test() { - local test_name="$1" - local test_func="$2" - - TESTS_RUN=$((TESTS_RUN + 1)) - log_test "$test_name" - - setup_test_env - - # Run test in subshell to isolate failures - local result=0 - ( - cd "$TEST_TEMP_DIR" - $test_func - ) && result=0 || result=$? - - if [[ $result -eq 0 ]]; then - log_pass "$test_name" - else - log_fail "$test_name (exit code: $result)" - fi - - cleanup_test_env -} - -# ======================================== -# Test: Mutual Exclusion -# ======================================== - -test_mutual_exclusion_rlcr_blocks_pr() { - # Create an active RLCR loop - mkdir -p .humanize/rlcr/2026-01-18_12-00-00 - echo "--- -current_round: 1 -max_iterations: 10 ----" > .humanize/rlcr/2026-01-18_12-00-00/state.md - - # Try to start a PR loop - should fail - export MOCK_GH_PR_NUMBER=123 - export MOCK_GH_PR_STATE="OPEN" - - local result - result=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --codex 2>&1) && return 1 || true - - # Should contain error about RLCR loop active - echo "$result" | grep -q "RLCR loop is already active" || return 1 -} - -test_mutual_exclusion_pr_blocks_rlcr() { - # Create an active PR loop - mkdir -p .humanize/pr-loop/2026-01-18_12-00-00 - echo "--- -current_round: 0 -max_iterations: 42 -pr_number: 123 ----" > .humanize/pr-loop/2026-01-18_12-00-00/state.md - - # Try to start an RLCR loop - should fail - echo "# Test Plan" > test-plan.md - - local result - result=$("$PROJECT_ROOT/scripts/setup-rlcr-loop.sh" test-plan.md 2>&1) && return 1 || true - - # Should contain error about PR loop active - echo "$result" | grep -q "PR loop is already active" || return 1 -} - -# ======================================== -# Test: Check PR Reviewer Status -# ======================================== - -test_reviewer_status_case1_no_comments() { - # Fixture with no bot comments - must clear ALL comment sources - echo "[]" > "$FIXTURES_DIR/issue-comments.json" - echo "[]" > "$FIXTURES_DIR/review-comments.json" - echo "[]" > "$FIXTURES_DIR/pr-reviews.json" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex") - - # Should return case 1 - local test_passed=true - echo "$result" | jq -e '.case == 1' || test_passed=false - - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM! Code looks good.","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - $test_passed -} - -test_reviewer_status_case2_partial_comments() { - # Only claude has commented - must clear codex comments too - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo "[]" > "$FIXTURES_DIR/review-comments.json" - echo "[]" > "$FIXTURES_DIR/pr-reviews.json" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex") - - # Should return case 2 (partial) - local test_passed=true - echo "$result" | jq -e '.case == 2' || test_passed=false - echo "$result" | jq -e '.reviewers_missing | contains(["codex"])' || test_passed=false - - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM! Code looks good.","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - $test_passed -} - -# ======================================== -# Test: Codex +1 Detection -# ======================================== - -test_codex_thumbsup_detected() { - local result - result=$("$PROJECT_ROOT/scripts/check-bot-reactions.sh" codex-thumbsup 123) - - # Should find the +1 reaction - echo "$result" | jq -e '.content == "+1"' || return 1 -} - -test_codex_thumbsup_with_after_filter() { - # Test --after filter - reaction is at 11:10:00Z, we filter for after 12:00:00Z - # So no reaction should be found - local result - if "$PROJECT_ROOT/scripts/check-bot-reactions.sh" codex-thumbsup 123 --after "2026-01-18T12:00:00Z" 2>/dev/null; then - # Should NOT succeed - reaction is before the filter time - return 1 - fi - # Correctly failed - reaction is before filter time - return 0 -} - -# ======================================== -# Test: Claude Eyes Detection -# ======================================== - -test_claude_eyes_detected() { - # Use delay 0 and retry 1 for fast test - local result - result=$("$PROJECT_ROOT/scripts/check-bot-reactions.sh" claude-eyes 12345 --retry 1 --delay 0) - - # Should find the eyes reaction - echo "$result" | jq -e '.content == "eyes"' || return 1 -} - -# ======================================== -# Test: PR Reviews Detection (PR submissions) -# ======================================== - -test_reviewer_status_includes_pr_reviews() { - # Set up fixture where codex has APPROVED via PR review (not comment) - echo "[]" > "$FIXTURES_DIR/issue-comments.json" - echo "[]" > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM! Code looks good.","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "codex") - - # Codex should be in reviewers_commented because of PR review - local test_passed=true - echo "$result" | jq -e '.reviewers_commented | contains(["codex"])' || test_passed=false - - $test_passed -} - -# ======================================== -# Test: Phase Detection -# ======================================== - -test_phase_detection_approved() { - # Source monitor-common.sh (located in scripts/lib/) - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake session dir with approve-state.md - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - touch "$session_dir/approve-state.md" - - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "approved" ]] || return 1 -} - -test_phase_detection_waiting_initial() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake session dir with state.md at round 0 and startup_case 1 - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - cat > "$session_dir/state.md" << 'EOF' ---- -current_round: 0 -startup_case: 1 ---- -EOF - - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "waiting_initial_review" ]] || return 1 -} - -test_phase_detection_waiting_reviewer() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake session dir with state.md at round 1 - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - cat > "$session_dir/state.md" << 'EOF' ---- -current_round: 1 -startup_case: 2 ---- -EOF - - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "waiting_reviewer" ]] || return 1 -} - -# ======================================== -# Test: Goal Tracker Parsing -# ======================================== - -test_goal_tracker_parsing() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake goal tracker file - local tracker_file="$TEST_TEMP_DIR/goal-tracker.md" - cat > "$tracker_file" << 'EOF' -# Goal Tracker - -### Ultimate Goal -Get all bots to approve the PR. - -### Acceptance Criteria - -| AC | Description | -|----|-------------| -| AC-1 | Bot claude approves | -| AC-2 | Bot codex approves | - -### Completed and Verified - -| AC | Description | -|----|-------------| -| AC-1 | Completed | - -#### Active Tasks - -| Task | Description | Status | -|------|-------------|--------| -| Fix bug | Fix the bug | pending | -| Add test | Add a test | completed | - -### Explicitly Deferred - -| Task | Description | -|------|-------------| - -### Open Issues - -| Issue | Description | -|-------|-------------| - -EOF - - local result - result=$(parse_goal_tracker "$tracker_file") - - # Should return: total_acs|completed_acs|active_tasks|completed_tasks|deferred_tasks|open_issues|goal_summary - # Expected: 2|1|1|0|0|0|Get all bots to approve the PR. - - local total_acs completed_acs active_tasks - IFS='|' read -r total_acs completed_acs active_tasks _ _ _ _ <<< "$result" - - [[ "$total_acs" == "2" ]] || { echo "Expected total_acs=2, got $total_acs"; return 1; } - [[ "$completed_acs" == "1" ]] || { echo "Expected completed_acs=1, got $completed_acs"; return 1; } - [[ "$active_tasks" == "1" ]] || { echo "Expected active_tasks=1, got $active_tasks"; return 1; } -} - -# ======================================== -# Test: PR Goal Tracker Parsing -# ======================================== - -test_pr_goal_tracker_parsing() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake PR goal tracker file - local tracker_file="$TEST_TEMP_DIR/pr-goal-tracker.md" - cat > "$tracker_file" << 'EOF' -# PR Goal Tracker - -## Total Statistics - -- Total Issues Found: 5 -- Total Issues Resolved: 3 -- Remaining: 2 - -## Issue Summary - -| ID | Reviewer | Round | Status | Description | -|----|----------|-------|--------|-------------| -| 1 | Claude | 0 | resolved | Issue one | -| 2 | Claude | 0 | resolved | Issue two | -| 3 | Codex | 1 | open | Issue three | -| 4 | Codex | 1 | resolved | Issue four | -| 5 | Claude | 2 | open | Issue five | - -EOF - - local result - result=$(humanize_parse_pr_goal_tracker "$tracker_file") - - # Should return: total_issues|resolved_issues|remaining_issues|last_reviewer - # Expected: 5|3|2|Claude - - local total_issues resolved_issues remaining_issues last_reviewer - IFS='|' read -r total_issues resolved_issues remaining_issues last_reviewer <<< "$result" - - [[ "$total_issues" == "5" ]] || { echo "Expected total_issues=5, got $total_issues"; return 1; } - [[ "$resolved_issues" == "3" ]] || { echo "Expected resolved_issues=3, got $resolved_issues"; return 1; } - [[ "$remaining_issues" == "2" ]] || { echo "Expected remaining_issues=2, got $remaining_issues"; return 1; } - [[ "$last_reviewer" == "Claude" ]] || { echo "Expected last_reviewer=Claude, got $last_reviewer"; return 1; } -} - -# ======================================== -# Test: State File Detection -# ======================================== - -test_state_file_detection_active() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create active state - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - echo "current_round: 0" > "$session_dir/state.md" - - local result - result=$(monitor_find_state_file "$session_dir") - - # Should return state.md with active status - echo "$result" | grep -q "state.md|active" || { echo "Expected active state, got $result"; return 1; } -} - -test_state_file_detection_approve() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create approve state (no state.md, only approve-state.md) - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - echo "approved" > "$session_dir/approve-state.md" - - local result - result=$(monitor_find_state_file "$session_dir") - - # Should return approve-state.md with approve status - echo "$result" | grep -q "approve-state.md|approve" || { echo "Expected approve state, got $result"; return 1; } -} - -# ======================================== -# Test: Phase Detection - Cancelled -# ======================================== - -test_phase_detection_cancelled() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake session dir with cancel-state.md - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - touch "$session_dir/cancel-state.md" - - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "cancelled" ]] || { echo "Expected cancelled, got $phase"; return 1; } -} - -test_phase_detection_maxiter() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake session dir with maxiter-state.md - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - touch "$session_dir/maxiter-state.md" - - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "maxiter" ]] || { echo "Expected maxiter, got $phase"; return 1; } -} - -# ======================================== -# Test: Startup Case Detection -# ======================================== - -test_reviewer_status_case3_all_commented() { - # All bots have commented - should be case 3 - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex") - - # Should return case 3 (all bots commented) - local test_passed=true - echo "$result" | jq -e '.case == 3' || test_passed=false - - $test_passed -} - -# ======================================== -# Test: update_pr_goal_tracker helper -# ======================================== - -test_update_pr_goal_tracker() { - # Source loop-common.sh - source "$PROJECT_ROOT/hooks/lib/loop-common.sh" - - # Create a goal tracker file - local tracker_file="$TEST_TEMP_DIR/goal-tracker.md" - cat > "$tracker_file" << 'EOF' -# PR Goal Tracker - -## Total Statistics - -- Total Issues Found: 2 -- Total Issues Resolved: 1 -- Remaining: 1 - -## Issue Summary -EOF - - # Update with new bot results (JSON format: issues=new found, resolved=new resolved) - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 3, "resolved": 2, "bot": "Codex"}' - - # Verify update - should add 3 found, 2 resolved (new totals: 5 found, 3 resolved, 2 remaining) - grep -q "Total Issues Found: 5" "$tracker_file" || { echo "Expected 5 total found"; return 1; } - grep -q "Total Issues Resolved: 3" "$tracker_file" || { echo "Expected 3 total resolved"; return 1; } - grep -q "Remaining: 2" "$tracker_file" || { echo "Expected 2 remaining"; return 1; } -} - -# ======================================== -# Test: Unpushed Commits Detection -# ======================================== - -test_unpushed_commits_detected() { - # Create a git repo with unpushed commits - local test_dir="$TEST_TEMP_DIR" - cd "$test_dir" - - # Initialize git repo and create a commit - git init -q - git config user.email "test@example.com" - git config user.name "Test User" - echo "# Test" > README.md - git add README.md - git commit -q -m "Initial commit" - - # Create a fake remote tracking branch (simulates having unpushed commits) - # This creates a local branch that pretends to track origin/main - git branch --set-upstream-to=HEAD 2>/dev/null || true - - # Add another commit (this will be "unpushed") - echo "new content" >> README.md - git add README.md - git commit -q -m "New commit" - - # Check git status for unpushed detection pattern - local ahead_count=$(git status -sb 2>/dev/null | grep -oE '\[ahead [0-9]+\]' | grep -oE '[0-9]+' || echo "0") - - # Test passes if we can detect we have local commits - # Note: In this test setup, we can't truly simulate upstream, so we verify the pattern matching works - [[ -n "$(git log --oneline -1)" ]] || return 1 -} - -# ======================================== -# Test: Force Push Detection Logic -# ======================================== - -test_force_push_ancestry_check() { - # Test git merge-base --is-ancestor behavior - local test_dir="$TEST_TEMP_DIR" - cd "$test_dir" - - # Create a git repo with two branches - git init -q - git config user.email "test@example.com" - git config user.name "Test User" - - # Create initial commit - echo "v1" > file.txt - git add file.txt - git commit -q -m "Initial" - local INITIAL_SHA=$(git rev-parse HEAD) - - # Create second commit - echo "v2" >> file.txt - git add file.txt - git commit -q -m "Second" - local SECOND_SHA=$(git rev-parse HEAD) - - # Test: INITIAL_SHA should be ancestor of SECOND_SHA - git merge-base --is-ancestor "$INITIAL_SHA" "$SECOND_SHA" || { echo "Expected $INITIAL_SHA to be ancestor of $SECOND_SHA"; return 1; } - - # Test: SECOND_SHA should NOT be ancestor of INITIAL_SHA - if git merge-base --is-ancestor "$SECOND_SHA" "$INITIAL_SHA" 2>/dev/null; then - echo "Expected $SECOND_SHA to NOT be ancestor of $INITIAL_SHA" - return 1 - fi - - return 0 -} - -# ======================================== -# Test: Approve State Creation -# ======================================== - -test_approve_state_detection() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create session dir with approve-state.md - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - echo "approved" > "$session_dir/approve-state.md" - - # Phase should be "approved" - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "approved" ]] || { echo "Expected phase=approved, got $phase"; return 1; } - - # State file detection should also work - local state_info - state_info=$(monitor_find_state_file "$session_dir") - - echo "$state_info" | grep -q "approve" || { echo "Expected approve in state_info, got $state_info"; return 1; } -} - -# ======================================== -# Test: Goal Tracker Schema -# ======================================== - -test_goal_tracker_schema() { - # Read the goal tracker init template - local template_file="$PROJECT_ROOT/prompt-template/pr-loop/goal-tracker-initial.md" - - # Verify required sections exist per plan - grep -q "## Issue Summary" "$template_file" || { echo "Missing Issue Summary section"; return 1; } - grep -q "## Total Statistics" "$template_file" || { echo "Missing Total Statistics section"; return 1; } - grep -q "## Issue Log" "$template_file" || { echo "Missing Issue Log section"; return 1; } - - # Verify Total Statistics has required fields - grep -q "Total Issues Found:" "$template_file" || { echo "Missing Total Issues Found field"; return 1; } - grep -q "Total Issues Resolved:" "$template_file" || { echo "Missing Total Issues Resolved field"; return 1; } - grep -q "Remaining:" "$template_file" || { echo "Missing Remaining field"; return 1; } -} - -# ======================================== -# Test: Dynamic Startup Case -# ======================================== - -test_startup_case_4_5_detection() { - # Test that check-pr-reviewer-status.sh detects case 4/5 (commits after reviews) - # Set up fixtures: both bots commented, but there's a newer commit - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T10:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T10:15:00Z","body":"LGTM","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - # Note: The mock would need to simulate a newer commit timestamp - # For this test, we verify the script returns valid JSON - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex" 2>/dev/null) || true - - # Should return valid JSON with case field - echo "$result" | jq -e '.case' >/dev/null || { echo "Invalid JSON or missing case field"; return 1; } -} - -# ======================================== -# Test: Goal Tracker Update with Issue Summary Row -# ======================================== - -test_goal_tracker_update_adds_row() { - # Source loop-common.sh - source "$PROJECT_ROOT/hooks/lib/loop-common.sh" - - # Create a goal tracker file with proper schema - local tracker_file="$TEST_TEMP_DIR/goal-tracker.md" - cat > "$tracker_file" << 'EOF' -# PR Review Goal Tracker - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | - -## Total Statistics - -- Total Issues Found: 0 -- Total Issues Resolved: 0 -- Remaining: 0 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* -EOF - - # Update with new bot results - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 2, "resolved": 0, "bot": "Codex"}' - - # Verify Issue Log has Round 1 entry - grep -q "### Round 1" "$tracker_file" || { echo "Missing Round 1 in Issue Log"; return 1; } - - # Verify totals updated - grep -q "Total Issues Found: 2" "$tracker_file" || { echo "Expected 2 total found"; return 1; } -} - -# ======================================== -# Test: Goal Tracker Update Idempotency -# ======================================== - -test_goal_tracker_update_idempotent() { - # Source loop-common.sh - source "$PROJECT_ROOT/hooks/lib/loop-common.sh" - - # Create a goal tracker file with proper schema - local tracker_file="$TEST_TEMP_DIR/goal-tracker.md" - cat > "$tracker_file" << 'EOF' -# PR Review Goal Tracker - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | - -## Total Statistics - -- Total Issues Found: 0 -- Total Issues Resolved: 0 -- Remaining: 0 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* -EOF - - # First update - should succeed - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 3, "resolved": 0, "bot": "Codex"}' - - # Verify first update worked - grep -q "Total Issues Found: 3" "$tracker_file" || { echo "First update failed - expected 3 total found"; return 1; } - - # Second update with SAME round AND SAME bot - should be SKIPPED (idempotent) - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 5, "resolved": 0, "bot": "Codex"}' - - # Totals should still be 3 (not 8) because round 1 was already recorded - grep -q "Total Issues Found: 3" "$tracker_file" || { echo "Idempotency failed - totals changed on duplicate update"; return 1; } - - # Count Issue Summary rows - should only have 2 (Round 0 + Round 1) - local row_count=$(grep -cE '^\|[[:space:]]*[0-9]+[[:space:]]*\|' "$tracker_file") - [[ "$row_count" -eq 2 ]] || { echo "Idempotency failed - expected 2 rows, got $row_count"; return 1; } -} - -# ======================================== -# Test: Shared Monitor - Find Latest Session -# ======================================== - -test_shared_monitor_find_latest_session() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create session directories with different timestamps - local loop_dir="$TEST_TEMP_DIR/.humanize/pr-loop" - mkdir -p "$loop_dir/2026-01-18_10-00-00" - mkdir -p "$loop_dir/2026-01-18_12-00-00" - mkdir -p "$loop_dir/2026-01-18_11-00-00" - - # Test that the latest session is found - local result - result=$(monitor_find_latest_session "$loop_dir") - - [[ "$(basename "$result")" == "2026-01-18_12-00-00" ]] || { - echo "Expected 2026-01-18_12-00-00, got $(basename "$result")" - return 1 - } -} - -# ======================================== -# Test: Shared Monitor - Find State File -# ======================================== - -test_shared_monitor_find_state_file() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - local session_dir="$TEST_TEMP_DIR/session" - mkdir -p "$session_dir" - - # Test 1: active state - touch "$session_dir/state.md" - local result - result=$(monitor_find_state_file "$session_dir") - local status="${result#*|}" - [[ "$status" == "active" ]] || { echo "Expected active, got $status"; return 1; } - - # Test 2: approve state (remove state.md, add approve-state.md) - rm "$session_dir/state.md" - touch "$session_dir/approve-state.md" - result=$(monitor_find_state_file "$session_dir") - status="${result#*|}" - [[ "$status" == "approve" ]] || { echo "Expected approve, got $status"; return 1; } - - # Test 3: no state file - rm "$session_dir/approve-state.md" - result=$(monitor_find_state_file "$session_dir") - status="${result#*|}" - [[ "$status" == "unknown" ]] || { echo "Expected unknown, got $status"; return 1; } -} - -# ======================================== -# Test: Shared Monitor - Get File Size -# ======================================== - -test_shared_monitor_get_file_size() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a test file with known content - local test_file="$TEST_TEMP_DIR/test-file.txt" - echo "Hello World" > "$test_file" - - local result - result=$(monitor_get_file_size "$test_file") - - # File should have content (size > 0) - [[ "$result" -gt 0 ]] || { echo "Expected size > 0, got $result"; return 1; } - - # Test non-existent file returns 0 - result=$(monitor_get_file_size "$TEST_TEMP_DIR/nonexistent.txt") - [[ "$result" -eq 0 ]] || { echo "Expected 0 for nonexistent file, got $result"; return 1; } -} - -# ======================================== -# Test: Phase Detection - Codex Analyzing (File Growth) -# ======================================== - -test_phase_detection_codex_analyzing() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - local session_dir="$TEST_TEMP_DIR/session" - mkdir -p "$session_dir" - - # Create state.md for active session - cat > "$session_dir/state.md" << 'EOF' ---- -current_round: 1 -startup_case: 2 ---- -EOF - - # Create a pr-check file with recent mtime (simulates Codex writing) - local check_file="$session_dir/round-1-pr-check.md" - echo "Analyzing PR..." > "$check_file" - # Touch with current time ensures mtime is within 10 seconds - touch "$check_file" - - # Test phase detection shows codex_analyzing - local result - result=$(get_pr_loop_phase "$session_dir") - [[ "$result" == "codex_analyzing" ]] || { - echo "Expected codex_analyzing, got $result" - return 1 - } - - # For the second test: make the file old and ensure cache shows no growth - # Touch with past timestamp - touch -d "2026-01-18 10:00:00" "$check_file" - - # Get the current file size and write it to cache twice - # (so second call sees no growth) - local size - size=$(stat -c%s "$check_file" 2>/dev/null || stat -f%z "$check_file" 2>/dev/null || echo 0) - local session_name=$(basename "$session_dir") - local cache_file="/tmp/humanize-phase-${session_name}-1.size" - echo "$size" > "$cache_file" - - # Now call again - same size, old mtime -> should be waiting_reviewer - result=$(get_pr_loop_phase "$session_dir") - [[ "$result" == "waiting_reviewer" ]] || { - echo "Expected waiting_reviewer after old mtime and no growth, got $result" - return 1 - } - - # Cleanup - rm -f "$cache_file" 2>/dev/null || true -} - -# ======================================== -# Test: Monitor Phase Display Output Assertions -# ======================================== - -# Helper: Run monitor with --once and capture output -run_monitor_once_capture_output() { - local session_dir="$1" - local project_dir="$2" - - # Create wrapper script that runs monitor and captures output - local wrapper="$project_dir/run_monitor_test.sh" - cat > "$wrapper" << 'WRAPPER_EOF' -#!/usr/bin/env bash -PROJECT_DIR="$1" -PROJECT_ROOT="$2" - -cd "$PROJECT_DIR" - -# Stub terminal commands for non-interactive mode -tput() { - case "$1" in - cols) echo "80" ;; - lines) echo "24" ;; - *) : ;; - esac -} -export -f tput -clear() { :; } -export -f clear - -# Disable ANSI colors for easier parsing -export NO_COLOR=1 - -# Source humanize.sh -source "$PROJECT_ROOT/scripts/humanize.sh" - -# Run monitor with --once flag -humanize monitor pr --once 2>&1 -WRAPPER_EOF - chmod +x "$wrapper" - - # Run and capture output - timeout 10 bash "$wrapper" "$project_dir" "$PROJECT_ROOT" 2>&1 || true -} - -# Test: Monitor displays "All reviews approved" for approved state -test_monitor_output_phase_approved() { - local test_dir="$TEST_TEMP_DIR/monitor_phase_approved" - mkdir -p "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" - - # Create approve-state.md (final approved state) - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/approve-state.md" << 'EOF' ---- -current_round: 1 -startup_case: 3 -pr_number: 123 -configured_bots: - - codex -active_bots: ---- -EOF - - # Create goal-tracker.md (required by monitor) - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/goal-tracker.md" << 'GOAL' -# Goal Tracker -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | -GOAL - - local output - output=$(run_monitor_once_capture_output "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" "$test_dir") - - # Assert output contains approved phase (require Phase: label) - if echo "$output" | grep -qi "Phase:.*approved\|Phase:.*All reviews"; then - return 0 - else - echo "Expected 'All reviews approved' in output, got: $(echo "$output" | head -20)" - return 1 - fi -} - -# Test: Monitor displays "Waiting for initial PR review" for waiting_initial_review state -test_monitor_output_phase_waiting_initial() { - local test_dir="$TEST_TEMP_DIR/monitor_phase_waiting" - mkdir -p "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" - - # Create state.md with startup_case=1, round=0 (waiting for initial review) - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/state.md" << 'EOF' ---- -current_round: 0 -startup_case: 1 -pr_number: 123 -configured_bots: - - codex - - claude -active_bots: - - codex - - claude ---- -EOF - - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/goal-tracker.md" << 'GOAL' -# Goal Tracker -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | -GOAL - - local output - output=$(run_monitor_once_capture_output "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" "$test_dir") - - # Assert output contains waiting phase (require Phase: label) - # For startup_case=1 (no comments yet), the loop is waiting for initial review - if echo "$output" | grep -qi "Phase:.*waiting"; then - return 0 - else - echo "Expected 'Phase:...waiting' in output, got: $(echo "$output" | head -20)" - return 1 - fi -} - -# Test: Monitor displays "Loop cancelled" for cancelled state -test_monitor_output_phase_cancelled() { - local test_dir="$TEST_TEMP_DIR/monitor_phase_cancelled" - mkdir -p "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" - - # Create cancel-state.md (cancelled state) - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/cancel-state.md" << 'EOF' ---- -current_round: 1 -startup_case: 3 -pr_number: 123 -configured_bots: - - codex -active_bots: - - codex -cancelled_at: 2026-01-18T12:00:00Z ---- -EOF - - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/goal-tracker.md" << 'GOAL' -# Goal Tracker -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | -GOAL - - local output - output=$(run_monitor_once_capture_output "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" "$test_dir") - - # Assert output contains cancel phase (require Phase: label) - if echo "$output" | grep -qi "Phase:.*cancel"; then - return 0 - else - echo "Expected 'Phase:...cancel' in output, got: $(echo "$output" | head -20)" - return 1 - fi -} - -# Test: Monitor displays "Codex analyzing..." for codex_analyzing phase -test_monitor_output_phase_codex_analyzing() { - local test_dir="$TEST_TEMP_DIR/monitor_phase_analyzing" - mkdir -p "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" - - # Create state.md for active session - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/state.md" << 'EOF' ---- -current_round: 1 -startup_case: 2 -pr_number: 123 -configured_bots: - - codex -active_bots: - - codex ---- -EOF - - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/goal-tracker.md" << 'GOAL' -# Goal Tracker -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | -GOAL - - # Create a pr-check file with current mtime (simulates Codex actively writing) - local check_file="$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/round-1-pr-check.md" - echo "Analyzing PR..." > "$check_file" - # Touch with current time ensures mtime is within 10 seconds - touch "$check_file" - - local output - output=$(run_monitor_once_capture_output "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" "$test_dir") - - # Assert output contains "Codex analyzing" phase (require Phase: prefix) - if echo "$output" | grep -qi "Phase:.*Codex.*analyz"; then - return 0 - else - echo "Expected 'Phase:...Codex analyzing' in output, got: $(echo "$output" | head -20)" - return 1 - fi -} - -# ======================================== -# Test: Case 1 Exception - No Trigger Required -# ======================================== - -test_case1_exception_no_trigger() { - # For startup_case 1/2/3 in round 0, no trigger is required - # This tests the logic that determines REQUIRE_TRIGGER - - # Test startup_case 1, round 0 -> REQUIRE_TRIGGER=false - local round=0 - local startup_case=1 - local require_trigger=false - - if [[ "$round" -gt 0 ]]; then - require_trigger=true - elif [[ "$round" -eq 0 ]]; then - case "$startup_case" in - 1|2|3) require_trigger=false ;; - 4|5) require_trigger=true ;; - esac - fi - - [[ "$require_trigger" == "false" ]] || { echo "Case 1 should not require trigger"; return 1; } - - # Test startup_case 2, round 0 -> REQUIRE_TRIGGER=false - startup_case=2 - require_trigger=false - if [[ "$round" -gt 0 ]]; then - require_trigger=true - elif [[ "$round" -eq 0 ]]; then - case "$startup_case" in - 1|2|3) require_trigger=false ;; - 4|5) require_trigger=true ;; - esac - fi - - [[ "$require_trigger" == "false" ]] || { echo "Case 2 should not require trigger"; return 1; } - - # Test startup_case 4, round 0 -> REQUIRE_TRIGGER=true - startup_case=4 - require_trigger=false - if [[ "$round" -gt 0 ]]; then - require_trigger=true - elif [[ "$round" -eq 0 ]]; then - case "$startup_case" in - 1|2|3) require_trigger=false ;; - 4|5) require_trigger=true ;; - esac - fi - - [[ "$require_trigger" == "true" ]] || { echo "Case 4 should require trigger"; return 1; } - - # Test round 1 (any case) -> REQUIRE_TRIGGER=true - round=1 - startup_case=1 - require_trigger=false - if [[ "$round" -gt 0 ]]; then - require_trigger=true - elif [[ "$round" -eq 0 ]]; then - case "$startup_case" in - 1|2|3) require_trigger=false ;; - 4|5) require_trigger=true ;; - esac - fi - - [[ "$require_trigger" == "true" ]] || { echo "Round 1 should require trigger"; return 1; } -} - -# ======================================== -# Test: Goal Tracker Row Inside Table -# ======================================== - -test_goal_tracker_row_inside_table() { - # Verify that update_pr_goal_tracker inserts rows INSIDE the Issue Summary table - # Not before "## Total Statistics" - - source "$PROJECT_ROOT/hooks/lib/loop-common.sh" - - local tracker_file="$TEST_TEMP_DIR/goal-tracker-table.md" - cat > "$tracker_file" << 'EOF' -# PR Review Goal Tracker - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | - -## Total Statistics - -- Total Issues Found: 0 -- Total Issues Resolved: 0 -- Remaining: 0 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* -EOF - - # Update with round 1 - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 2, "resolved": 0, "bot": "Codex"}' - - # Verify: The new row should be BEFORE the blank line that ends the table - # Check that there's a table row with Round 1 BEFORE "## Total Statistics" - - # Extract just the Issue Summary section - local summary_section - summary_section=$(sed -n '/^## Issue Summary/,/^## Total Statistics/p' "$tracker_file") - - # The section should contain | 1 | somewhere (Round 1 row) - echo "$summary_section" | grep -qE '^\|[[:space:]]*1[[:space:]]*\|' || { - echo "Round 1 row not found in Issue Summary table" - echo "Content:" - cat "$tracker_file" - return 1 - } - - # Verify the row appears BEFORE "## Total Statistics" (already ensured by sed range) - # and the table structure is valid (rows end before blank line before ## Total Statistics) - - # Count table rows in Issue Summary (should be 3: header, separator, round 0, round 1) - local row_count - row_count=$(echo "$summary_section" | grep -cE '^\|' || echo 0) - [[ "$row_count" -ge 4 ]] || { - echo "Expected at least 4 table rows (header + separator + 2 data rows), got $row_count" - return 1 - } -} - -# ======================================== -# Test: Goal Tracker Partial Update Repair -# ======================================== - -test_goal_tracker_partial_update_repair() { - # Verify that update_pr_goal_tracker repairs partial updates - # (when only summary OR log exists, not both) - - source "$PROJECT_ROOT/hooks/lib/loop-common.sh" - - # Test 1: Tracker with summary row but NO log entry - local tracker_file="$TEST_TEMP_DIR/goal-tracker-partial1.md" - cat > "$tracker_file" << 'EOF' -# PR Review Goal Tracker - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | -| 1 | Codex | 2 | 0 | Issues Found | - -## Total Statistics - -- Total Issues Found: 2 -- Total Issues Resolved: 0 -- Remaining: 2 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* -EOF - - # Update - should add log entry but not summary row (since summary exists) - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 2, "resolved": 0, "bot": "Codex"}' - - # Should now have Round 1 in Issue Log - grep -q "### Round 1" "$tracker_file" || { echo "Log entry for Round 1 not added"; return 1; } - - # Test 2: Tracker with log entry but NO summary row - local tracker_file2="$TEST_TEMP_DIR/goal-tracker-partial2.md" - cat > "$tracker_file2" << 'EOF' -# PR Review Goal Tracker - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | - -## Total Statistics - -- Total Issues Found: 0 -- Total Issues Resolved: 0 -- Remaining: 0 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* - -### Round 1 -Codex: Found 2 issues, Resolved 0 -EOF - - # Update - should add summary row but not log entry (since log exists) - update_pr_goal_tracker "$tracker_file2" 1 '{"issues": 2, "resolved": 0, "bot": "Codex"}' - - # Should now have Round 1 in summary table - grep -qE '^\|[[:space:]]*1[[:space:]]*\|' "$tracker_file2" || { echo "Summary row for Round 1 not added"; return 1; } -} - -# ======================================== -# Test: Case 4 Emission (all commented + new commits) -# ======================================== - -test_case4_all_commented_new_commits() { - # Verify Case 4 is emitted when ALL reviewers commented and new commits after - - # Fixture: All bots commented at 10:00, latest commit at 11:00 - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T10:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T10:05:00Z","body":"LGTM","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - # Mock commit at 11:00 (after reviews) - export MOCK_GH_LATEST_COMMIT_AT="2026-01-18T11:00:00Z" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex" 2>/dev/null) || true - - # Should return Case 4 (all commented, new commits) - local case_num - case_num=$(echo "$result" | jq -r '.case') - [[ "$case_num" == "4" ]] || { echo "Expected Case 4, got $case_num"; return 1; } - - # has_commits_after_reviews should be true - local has_commits - has_commits=$(echo "$result" | jq -r '.has_commits_after_reviews') - [[ "$has_commits" == "true" ]] || { echo "Expected has_commits_after_reviews=true, got $has_commits"; return 1; } - - # Cleanup mock - unset MOCK_GH_LATEST_COMMIT_AT -} - -# ======================================== -# Test: Case 5 Emission (partial + new commits) -# ======================================== - -test_case5_partial_commented_new_commits() { - # Verify Case 5 is emitted when SOME reviewers commented and new commits after - - # Fixture: Only claude commented at 10:00, codex missing - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T10:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[]' > "$FIXTURES_DIR/pr-reviews.json" # No codex - - # Mock commit at 11:00 (after claude's review) - export MOCK_GH_LATEST_COMMIT_AT="2026-01-18T11:00:00Z" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex" 2>/dev/null) || true - - # Should return Case 5 (partial commented, new commits) - local case_num - case_num=$(echo "$result" | jq -r '.case') - [[ "$case_num" == "5" ]] || { echo "Expected Case 5, got $case_num"; return 1; } - - # has_commits_after_reviews should be true - local has_commits - has_commits=$(echo "$result" | jq -r '.has_commits_after_reviews') - [[ "$has_commits" == "true" ]] || { echo "Expected has_commits_after_reviews=true, got $has_commits"; return 1; } - - # Cleanup mock - unset MOCK_GH_LATEST_COMMIT_AT - - # Restore original fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM! Code looks good.","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" -} - -# ======================================== -# Test: Setup Case 4/5 Failure Path (missing trigger_comment_id) -# ======================================== - -test_setup_case45_missing_trigger_comment_id() { - # Test that setup-pr-loop.sh fails when trigger_comment_id cannot be retrieved - # for Case 4/5 with --claude option - # This tests the fix that requires eyes verification - - # Set up fixtures for Case 4: All bots commented, new commits after reviews - # Only claude for simplicity - fixture needs bot comment BEFORE latest commit - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T08:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[]' > "$FIXTURES_DIR/pr-reviews.json" - - # Set latest commit AFTER bot comments to trigger Case 4 - export MOCK_GH_LATEST_COMMIT_AT="2026-01-18T12:00:00Z" - export MOCK_GH_PR_NUMBER=123 - export MOCK_GH_PR_STATE="OPEN" - # Make the regular mock return null for the comment lookup that gets the trigger ID - export MOCK_GH_COMMENT_ID_LOOKUP_FAIL=true - - # Run setup-pr-loop.sh with --claude - should fail due to missing trigger_comment_id - local result exit_code - result=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --claude 2>&1) && exit_code=0 || exit_code=$? - - # Clean up mock env vars - unset MOCK_GH_LATEST_COMMIT_AT MOCK_GH_COMMENT_ID_LOOKUP_FAIL - - # Verify it failed - if [[ $exit_code -eq 0 ]]; then - echo "Expected setup to fail but it succeeded" - echo "Output (last 30 lines): $(echo "$result" | tail -30)" - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - return 1 - fi - - # Verify error message about missing trigger comment ID - if ! echo "$result" | grep -q "Could not find trigger comment ID"; then - echo "Expected error message about missing trigger_comment_id" - echo "Got: $result" - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - return 1 - fi - - # Verify loop directory was cleaned up - if ls .humanize/pr-loop/*/state.md 2>/dev/null | head -1 | grep -q .; then - echo "Loop directory was not cleaned up on failure" - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - return 1 - fi - - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - return 0 -} - -# ======================================== -# Test: Goal Tracker Creation/Update Integration Test -# ======================================== - -test_goal_tracker_creation_integration() { - # Test that setup-pr-loop.sh creates goal-tracker.md - # This verifies: goal tracker is created at setup - - # Set up fixtures for Case 1: No comments yet (simplest setup) - echo '[]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[]' > "$FIXTURES_DIR/reactions.json" - - export MOCK_GH_PR_NUMBER=999 - export MOCK_GH_PR_STATE="OPEN" - export MOCK_GH_LATEST_COMMIT_AT="2026-01-18T10:00:00Z" - export MOCK_GH_HEAD_SHA="abc123xyz" - - # Clean up any existing pr-loop directories - rm -rf .humanize/pr-loop 2>/dev/null || true - - # Run setup-pr-loop.sh with --codex - local result exit_code - result=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --codex 2>&1) && exit_code=0 || exit_code=$? - - # Clean up mock env vars - unset MOCK_GH_PR_NUMBER MOCK_GH_PR_STATE MOCK_GH_LATEST_COMMIT_AT MOCK_GH_HEAD_SHA - - # Find the created loop directory - local loop_dir - loop_dir=$(ls -d .humanize/pr-loop/*/ 2>/dev/null | head -1) - - if [[ -z "$loop_dir" ]]; then - echo "No loop directory created by setup-pr-loop.sh" - echo "Output: $(echo "$result" | tail -20)" - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[{"id":5001,"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T11:10:00Z"}]' > "$FIXTURES_DIR/reactions.json" - return 1 - fi - - # Verify goal-tracker.md was created - if [[ ! -f "${loop_dir}goal-tracker.md" ]]; then - echo "goal-tracker.md not found in $loop_dir" - echo "Files in loop dir: $(ls -la "$loop_dir" 2>/dev/null)" - # Clean up - rm -rf .humanize/pr-loop - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[{"id":5001,"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T11:10:00Z"}]' > "$FIXTURES_DIR/reactions.json" - return 1 - fi - - # Verify goal-tracker.md has expected structure (Issue Summary table) - if ! grep -q "Issue Summary" "${loop_dir}goal-tracker.md"; then - echo "goal-tracker.md missing 'Issue Summary' section" - echo "Contents: $(cat "${loop_dir}goal-tracker.md")" - rm -rf .humanize/pr-loop - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[{"id":5001,"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T11:10:00Z"}]' > "$FIXTURES_DIR/reactions.json" - return 1 - fi - - # Verify goal-tracker.md has PR number from mock - if ! grep -q "999" "${loop_dir}goal-tracker.md"; then - echo "goal-tracker.md missing PR number 999" - echo "Contents: $(cat "${loop_dir}goal-tracker.md")" - rm -rf .humanize/pr-loop - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[{"id":5001,"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T11:10:00Z"}]' > "$FIXTURES_DIR/reactions.json" - return 1 - fi - - # Clean up - rm -rf .humanize/pr-loop - - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[{"id":5001,"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T11:10:00Z"}]' > "$FIXTURES_DIR/reactions.json" - - return 0 -} - -# Test: Stop hook updates goal tracker with round results -test_stophook_updates_goal_tracker() { - # This test verifies that running the stop hook after bot review updates the goal tracker - local test_dir="$TEST_TEMP_DIR/stophook_goal_test" - mkdir -p "$test_dir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Use dynamic timestamps - local trigger_ts commit_ts comment_ts - trigger_ts=$(date -u -d "-10 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-10S +%Y-%m-%dT%H:%M:%SZ) - commit_ts=$(date -u -d "-60 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-60S +%Y-%m-%dT%H:%M:%SZ) - comment_ts=$(date -u -d "-5 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-5S +%Y-%m-%dT%H:%M:%SZ) - - # Create state.md for Round 0 - cat > "$test_dir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 60 -started_at: $commit_ts -last_trigger_at: $trigger_ts -trigger_comment_id: 999 -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: $commit_ts ---- -EOF - - # Create initial goal tracker (need blank line after table header for row insertion) - cat > "$test_dir/.humanize/pr-loop/2026-01-18_12-00-00/goal-tracker.md" << 'EOF' -# PR Review Goal Tracker (PR #123) - -## Issue Summary - -| Round | Bot | Issues Found | Issues Resolved | Status | -|-------|-----|--------------|-----------------|--------| - -## Total Statistics -- Total Issues Found: 0 -- Total Issues Resolved: 0 -EOF - - # Create round-0 resolve file - echo "# Resolution" > "$test_dir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - # Create mock gh and git - local mock_bin="$test_dir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << MOCK_GH -#!/usr/bin/env bash -COMMENT_TS="$comment_ts" -COMMIT_TS="$commit_ts" - -case "\$1" in - repo) - if [[ "\$*" == *"--json owner"* ]]; then - echo "testowner" - exit 0 - fi - if [[ "\$*" == *"--json name"* ]]; then - echo "testrepo" - exit 0 - fi - ;; - api) - if [[ "\$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - if [[ "\$2" == *"/issues/"*"/comments"* ]]; then - # Return codex comment with issues - echo "[{\"id\":1001,\"user\":{\"login\":\"chatgpt-codex-connector[bot]\",\"type\":\"Bot\"},\"created_at\":\"\$COMMENT_TS\",\"body\":\"Found 2 issues: fix X, fix Y\"}]" - exit 0 - fi - if [[ "\$2" == *"/pulls/"*"/reviews"* ]]; then - echo '[]' - exit 0 - fi - if [[ "\$2" == *"/pulls/"*"/comments"* ]]; then - echo '[]' - exit 0 - fi - echo '[]' - exit 0 - ;; - pr) - if [[ "\$*" == *"commits"* ]] && [[ "\$*" == *"headRefOid"* ]]; then - echo "{\"sha\":\"abc123\",\"date\":\"\$COMMIT_TS\"}" - exit 0 - fi - if [[ "\$*" == *"commits"* ]] && [[ "\$*" == *"--jq"* ]]; then - # When --jq is used, return just the extracted timestamp - echo "\$COMMIT_TS" - exit 0 - fi - if [[ "\$*" == *"commits"* ]]; then - echo "{\"commits\":[{\"committedDate\":\"\$COMMIT_TS\"}]}" - exit 0 - fi - # PR lookup with number and url: gh pr view --json number,url -q '.number,.url' - if [[ "\$*" == *"number,url"* ]]; then - echo '123' - echo 'https://github.com/testowner/testrepo/pull/123' - exit 0 - fi - # PR existence check: gh pr view --repo ... --json number -q .number - if [[ "\$*" == *"number"* ]] && [[ "\$*" != *"commits"* ]]; then - echo '123' - exit 0 - fi - if [[ "\$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - echo "" - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - # Mock codex command - returns ISSUES_REMAINING to trigger goal tracker update - cat > "$mock_bin/codex" << 'MOCK_CODEX' -#!/usr/bin/env bash -# Mock codex for testing - output review analysis -cat << 'CODEX_OUTPUT' -## Bot Review Analysis - -### codex (chatgpt-codex-connector[bot]) -**Status**: ISSUES -**Issues Found**: 1 -- Fix issue X - -### Issues Found (if any) -- Fix issue X - -### Approved Bots (to remove from active_bots) -(none) - -### Final Recommendation -ISSUES_REMAINING -CODEX_OUTPUT -exit 0 -MOCK_CODEX - chmod +x "$mock_bin/codex" - - # Run stop hook - export CLAUDE_PROJECT_DIR="$test_dir" - local old_path="$PATH" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(timeout 15 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT" 2>&1) || true - - export PATH="$old_path" - unset CLAUDE_PROJECT_DIR - - # Verify goal tracker was updated with Round 1 row - local goal_file="$test_dir/.humanize/pr-loop/2026-01-18_12-00-00/goal-tracker.md" - if [[ ! -f "$goal_file" ]]; then - echo "Goal tracker file not found" - rm -rf "$test_dir" - return 1 - fi - - # Check that Round 1 row was added (format: | 1 | with possible spaces) - if ! grep -qE '^\|[[:space:]]*1[[:space:]]*\|' "$goal_file"; then - echo "Goal tracker not updated with Round 1" - echo "Contents: $(cat "$goal_file")" - echo "Hook output: $(echo "$hook_output" | tail -20)" - rm -rf "$test_dir" - return 1 - fi - - # Check that codex bot is mentioned in the row (lowercase to match configured bot names) - if ! grep -qi "codex" "$goal_file"; then - echo "Goal tracker missing codex bot entry" - echo "Contents: $(cat "$goal_file")" - rm -rf "$test_dir" - return 1 - fi - - rm -rf "$test_dir" - return 0 -} - -# ======================================== -# Main test runner -# ======================================== - -main() { - local test_filter="${1:-}" - - echo "==========================================" - echo " PR Loop System Tests" - echo "==========================================" - echo "" - echo "Project root: $PROJECT_ROOT" - echo "Mock directory: $MOCKS_DIR" - echo "Fixtures directory: $FIXTURES_DIR" - echo "" - - # Run tests - if [[ -z "$test_filter" || "$test_filter" == "mutual_exclusion" ]]; then - run_test "Mutual exclusion - RLCR blocks PR" test_mutual_exclusion_rlcr_blocks_pr - run_test "Mutual exclusion - PR blocks RLCR" test_mutual_exclusion_pr_blocks_rlcr - fi - - if [[ -z "$test_filter" || "$test_filter" == "reviewer_status" ]]; then - run_test "Reviewer status - Case 1 (no comments)" test_reviewer_status_case1_no_comments - run_test "Reviewer status - Case 2 (partial comments)" test_reviewer_status_case2_partial_comments - fi - - if [[ -z "$test_filter" || "$test_filter" == "reactions" ]]; then - run_test "Codex +1 detection" test_codex_thumbsup_detected - run_test "Codex +1 with --after filter" test_codex_thumbsup_with_after_filter - run_test "Claude eyes detection" test_claude_eyes_detected - fi - - if [[ -z "$test_filter" || "$test_filter" == "pr_reviews" ]]; then - run_test "PR reviews detection" test_reviewer_status_includes_pr_reviews - fi - - if [[ -z "$test_filter" || "$test_filter" == "phase" ]]; then - run_test "Phase detection - approved" test_phase_detection_approved - run_test "Phase detection - waiting initial" test_phase_detection_waiting_initial - run_test "Phase detection - waiting reviewer" test_phase_detection_waiting_reviewer - fi - - if [[ -z "$test_filter" || "$test_filter" == "goal_tracker" ]]; then - run_test "Goal tracker parsing" test_goal_tracker_parsing - fi - - if [[ -z "$test_filter" || "$test_filter" == "pr_goal_tracker" ]]; then - run_test "PR goal tracker parsing" test_pr_goal_tracker_parsing - run_test "update_pr_goal_tracker helper" test_update_pr_goal_tracker - fi - - if [[ -z "$test_filter" || "$test_filter" == "state_file" ]]; then - run_test "State file detection - active" test_state_file_detection_active - run_test "State file detection - approve" test_state_file_detection_approve - fi - - if [[ -z "$test_filter" || "$test_filter" == "phase_extended" ]]; then - run_test "Phase detection - cancelled" test_phase_detection_cancelled - run_test "Phase detection - maxiter" test_phase_detection_maxiter - fi - - if [[ -z "$test_filter" || "$test_filter" == "reviewer_status_extended" ]]; then - run_test "Reviewer status - Case 3 (all commented)" test_reviewer_status_case3_all_commented - fi - - if [[ -z "$test_filter" || "$test_filter" == "unpushed" ]]; then - run_test "Unpushed commits detection" test_unpushed_commits_detected - fi - - if [[ -z "$test_filter" || "$test_filter" == "force_push" ]]; then - run_test "Force push ancestry check" test_force_push_ancestry_check - fi - - if [[ -z "$test_filter" || "$test_filter" == "approve_state" ]]; then - run_test "Approve state detection" test_approve_state_detection - fi - - if [[ -z "$test_filter" || "$test_filter" == "goal_tracker_schema" ]]; then - run_test "Goal tracker schema" test_goal_tracker_schema - run_test "Goal tracker update adds row" test_goal_tracker_update_adds_row - run_test "Goal tracker update idempotent" test_goal_tracker_update_idempotent - fi - - if [[ -z "$test_filter" || "$test_filter" == "startup_case" ]]; then - run_test "Startup case 4/5 detection" test_startup_case_4_5_detection - fi - - if [[ -z "$test_filter" || "$test_filter" == "shared_monitor" ]]; then - run_test "Shared monitor - find latest session" test_shared_monitor_find_latest_session - run_test "Shared monitor - find state file" test_shared_monitor_find_state_file - run_test "Shared monitor - get file size" test_shared_monitor_get_file_size - fi - - if [[ -z "$test_filter" || "$test_filter" == "phase_analyzing" ]]; then - run_test "Phase detection - codex analyzing (file growth)" test_phase_detection_codex_analyzing - fi - - # Monitor output assertions for phase labels - if [[ -z "$test_filter" || "$test_filter" == "monitor_output" ]]; then - run_test "Monitor output - approved phase display" test_monitor_output_phase_approved - run_test "Monitor output - waiting initial phase display" test_monitor_output_phase_waiting_initial - run_test "Monitor output - cancelled phase display" test_monitor_output_phase_cancelled - run_test "Monitor output - codex analyzing phase display" test_monitor_output_phase_codex_analyzing - fi - - if [[ -z "$test_filter" || "$test_filter" == "case1_exception" ]]; then - run_test "Case 1 exception - no trigger required for startup_case 1" test_case1_exception_no_trigger - fi - - if [[ -z "$test_filter" || "$test_filter" == "goal_tracker_table" ]]; then - run_test "Goal tracker row inserted inside table" test_goal_tracker_row_inside_table - fi - - if [[ -z "$test_filter" || "$test_filter" == "goal_tracker_partial" ]]; then - run_test "Goal tracker partial update repair" test_goal_tracker_partial_update_repair - fi - - if [[ -z "$test_filter" || "$test_filter" == "case_4_5" ]]; then - run_test "Case 4 emission (all commented + new commits)" test_case4_all_commented_new_commits - run_test "Case 5 emission (partial + new commits)" test_case5_partial_commented_new_commits - fi - - if [[ -z "$test_filter" || "$test_filter" == "setup_failure" ]]; then - run_test "Setup Case 4/5 failure path (missing trigger_comment_id)" test_setup_case45_missing_trigger_comment_id - fi - - if [[ -z "$test_filter" || "$test_filter" == "goal_tracker_integration" ]]; then - run_test "Goal tracker creation via setup-pr-loop.sh" test_goal_tracker_creation_integration - run_test "Stop hook updates goal tracker with round results" test_stophook_updates_goal_tracker - fi - - echo "" - echo "==========================================" - echo " Results" - echo "==========================================" - echo "" - echo "Tests run: $TESTS_RUN" - echo -e "Tests passed: ${GREEN}$TESTS_PASSED${NC}" - echo -e "Tests failed: ${RED}$TESTS_FAILED${NC}" - echo "" - - if [[ $TESTS_FAILED -gt 0 ]]; then - exit 1 - fi -} - -main "$@" diff --git a/tests/test-pr-loop.sh b/tests/test-pr-loop.sh deleted file mode 100755 index 0bb615b8..00000000 --- a/tests/test-pr-loop.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env bash -# -# Tests for PR loop feature -# -# This is the main test runner that sources and executes all test modules: -# - test-pr-loop-scripts.sh: Script argument validation tests -# - test-pr-loop-hooks.sh: Hook functionality tests -# - test-pr-loop-stophook.sh: Stop hook tests -# -# Usage: ./test-pr-loop.sh -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -# Source test helpers and common library -source "$SCRIPT_DIR/test-helpers.sh" -source "$SCRIPT_DIR/test-pr-loop-lib.sh" - -# ======================================== -# Test Environment Setup -# ======================================== - -init_pr_loop_test_env - -# ======================================== -# Source Test Modules -# ======================================== - -source "$SCRIPT_DIR/test-pr-loop-scripts.sh" -source "$SCRIPT_DIR/test-pr-loop-hooks.sh" -source "$SCRIPT_DIR/test-pr-loop-stophook.sh" - -# ======================================== -# Run All Tests -# ======================================== - -# Script tests (setup, cancel, fetch, poll) -run_script_tests - -# Hook functionality tests -run_hook_tests - -# Stop hook tests -run_stophook_tests - -# ======================================== -# Print Summary -# ======================================== - -print_test_summary diff --git a/tests/test-unified-codex-config.sh b/tests/test-unified-codex-config.sh index 66a0eebe..5948193f 100755 --- a/tests/test-unified-codex-config.sh +++ b/tests/test-unified-codex-config.sh @@ -673,76 +673,6 @@ done echo "" -# ======================================== -# PR loop respects config-backed codex_model (AC-5) -# ======================================== - -echo "--- PR loop config-backed defaults ---" - -SETUP_PR_LOOP="$PROJECT_ROOT/scripts/setup-pr-loop.sh" -PR_STOP_HOOK="$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" - -if [[ ! -f "$LOOP_COMMON" ]]; then - skip "PR loop config tests require loop-common.sh" "file not found" -elif [[ ! -f "$SETUP_PR_LOOP" ]]; then - skip "PR loop config tests require setup-pr-loop.sh" "file not found" -else - # PR loop setup does NOT pre-set DEFAULT_CODEX_MODEL (should come from config) - assert_no_grep "setup-pr-loop.sh: does not pre-set DEFAULT_CODEX_MODEL" \ - 'DEFAULT_CODEX_MODEL=' "$SETUP_PR_LOOP" - - # PR loop setup DOES pre-set DEFAULT_CODEX_EFFORT to medium - assert_grep "setup-pr-loop.sh: pre-sets DEFAULT_CODEX_EFFORT to medium" \ - 'DEFAULT_CODEX_EFFORT="medium"' "$SETUP_PR_LOOP" - - # PR stop hook also does NOT pre-set DEFAULT_CODEX_MODEL - if [[ ! -f "$PR_STOP_HOOK" ]]; then - skip "pr-loop-stop-hook.sh tests require pr-loop-stop-hook.sh" "file not found" - else - assert_no_grep "pr-loop-stop-hook.sh: does not pre-set DEFAULT_CODEX_MODEL" \ - 'DEFAULT_CODEX_MODEL=' "$PR_STOP_HOOK" - - assert_grep "pr-loop-stop-hook.sh: pre-sets DEFAULT_CODEX_EFFORT to medium" \ - 'DEFAULT_CODEX_EFFORT="medium"' "$PR_STOP_HOOK" - fi - - # Behavioral: sourcing loop-common.sh with PR loop effort pre-set picks up config model - setup_test_dir - PR_CFG_PROJECT="$TEST_DIR/pr-cfg-project" - mkdir -p "$PR_CFG_PROJECT/.humanize" - printf '{"codex_model": "o3-mini", "codex_effort": "low"}' > "$PR_CFG_PROJECT/.humanize/config.json" - - result=$(bash -c " - export DEFAULT_CODEX_EFFORT='medium' - export CLAUDE_PROJECT_DIR='$PR_CFG_PROJECT' - export XDG_CONFIG_HOME='$TEST_DIR/no-user-config' - source '$LOOP_COMMON' 2>/dev/null - echo \"\$DEFAULT_CODEX_MODEL|\$DEFAULT_CODEX_EFFORT\" - " 2>/dev/null || echo "ERROR") - - assert_eq "PR loop behavioral: config codex_model respected (o3-mini)" \ - "o3-mini" "$(echo "$result" | cut -d'|' -f1)" - - assert_eq "PR loop behavioral: pre-set effort kept over config (medium)" \ - "medium" "$(echo "$result" | cut -d'|' -f2)" - - # Without config, falls back to hardcoded default model but keeps medium effort - result=$(bash -c " - export DEFAULT_CODEX_EFFORT='medium' - export XDG_CONFIG_HOME='$TEST_DIR/no-user-config' - source '$LOOP_COMMON' 2>/dev/null - echo \"\$DEFAULT_CODEX_MODEL|\$DEFAULT_CODEX_EFFORT\" - " 2>/dev/null || echo "ERROR") - - assert_eq "PR loop behavioral: no config falls back to gpt-5.4" \ - "gpt-5.4" "$(echo "$result" | cut -d'|' -f1)" - - assert_eq "PR loop behavioral: no config keeps medium effort" \ - "medium" "$(echo "$result" | cut -d'|' -f2)" -fi - -echo "" - # ======================================== # ask-codex respects config-backed defaults (AC-5) # ======================================== @@ -775,127 +705,6 @@ fi echo "" -# ======================================== -# PR loop --codex-model override (runtime behavioral) -# ======================================== - -echo "--- PR loop --codex-model override (runtime) ---" - -if [[ ! -f "$SETUP_PR_LOOP" ]]; then - skip "PR loop override test requires setup-pr-loop.sh" "file not found" -else - # Run setup-pr-loop.sh --help with project config to verify help text shows config-backed default - # --help exits before requiring gh/PR prerequisites, so no external deps needed - setup_test_dir - PR_OVERRIDE_PROJECT="$TEST_DIR/pr-override-project" - mkdir -p "$PR_OVERRIDE_PROJECT/.humanize" - printf '{"codex_model": "o3-mini", "codex_effort": "low"}' > "$PR_OVERRIDE_PROJECT/.humanize/config.json" - - help_output=$(cd "$PR_OVERRIDE_PROJECT" && \ - CLAUDE_PROJECT_DIR="$PR_OVERRIDE_PROJECT" \ - XDG_CONFIG_HOME="$TEST_DIR/no-user-config" \ - timeout 10 bash "$SETUP_PR_LOOP" --help 2>&1) || true - - # Help text must mention config-backed default (not a hardcoded model name) - if echo "$help_output" | grep -q 'default from config'; then - pass "PR loop runtime: --help shows config-backed default" - else - fail "PR loop runtime: --help shows config-backed default" "contains 'default from config'" "$(echo "$help_output" | grep codex-model)" - fi - - # End-to-end: run setup-pr-loop.sh with mock gh/codex and --codex-model override - if ! command -v jq >/dev/null 2>&1; then - skip "PR loop e2e test requires jq" "jq not found" - else - setup_test_dir - PR_E2E_PROJECT="$TEST_DIR/pr-e2e-project" - init_test_git_repo "$PR_E2E_PROJECT" - mkdir -p "$PR_E2E_PROJECT/.humanize" - printf '{"codex_model": "o3-mini", "codex_effort": "low"}' > "$PR_E2E_PROJECT/.humanize/config.json" - - # Create a local bare remote (setup-pr-loop.sh needs a git remote) - PR_BARE_REMOTE="$TEST_DIR/pr-remote.git" - git clone --bare "$PR_E2E_PROJECT" "$PR_BARE_REMOTE" -q 2>/dev/null - (cd "$PR_E2E_PROJECT" && git remote remove origin 2>/dev/null; git remote add origin "$PR_BARE_REMOTE") 2>/dev/null || true - - # Create mock gh that handles all setup-pr-loop.sh calls - PR_MOCK_BIN="$TEST_DIR/pr-mock-bin" - mkdir -p "$PR_MOCK_BIN" - cat > "$PR_MOCK_BIN/gh" << 'GH_MOCK_EOF' -#!/usr/bin/env bash -# Mock gh for setup-pr-loop.sh end-to-end test -ALL_ARGS="$*" -case "$1" in - auth) exit 0 ;; - repo) - if [[ "$ALL_ARGS" == *"owner,name"* ]]; then - echo "testowner/testrepo"; exit 0 - elif [[ "$ALL_ARGS" == *"parent"* ]]; then - echo "null/"; exit 0 - fi ;; - pr) - if [[ "$2" == "view" ]]; then - if [[ "$ALL_ARGS" == *"number,url"* ]]; then - printf '123\nhttps://github.com/testowner/testrepo/pull/123'; exit 0 - elif [[ "$ALL_ARGS" == *"state"* ]]; then - echo "OPEN"; exit 0 - elif [[ "$ALL_ARGS" == *"number"* ]]; then - echo "123"; exit 0 - elif [[ "$ALL_ARGS" == *"headRefOid"* ]]; then - echo '{"sha":"abc123","date":"2026-01-01T00:00:00Z"}'; exit 0 - fi - elif [[ "$2" == "comment" ]]; then - echo "https://github.com/testowner/testrepo/pull/123#comment-1"; exit 0 - fi ;; - api) - if [[ "$2" == "user" ]]; then - echo '{"login":"testuser"}'; exit 0 - elif [[ "$2" == *"/comments"* ]] || [[ "$2" == *"/reviews"* ]]; then - echo "[]"; exit 0 - fi - echo "[]"; exit 0 ;; -esac -echo "Mock gh: unhandled: $ALL_ARGS" >&2; exit 1 -GH_MOCK_EOF - chmod +x "$PR_MOCK_BIN/gh" - - # Create mock codex (not called during setup, but required by command -v check) - cat > "$PR_MOCK_BIN/codex" << 'CODEX_MOCK_EOF' -#!/usr/bin/env bash -exit 0 -CODEX_MOCK_EOF - chmod +x "$PR_MOCK_BIN/codex" - - # Run setup-pr-loop.sh with --codex-model override - pr_setup_exit=0 - pr_output=$(cd "$PR_E2E_PROJECT" && \ - CLAUDE_PROJECT_DIR="$PR_E2E_PROJECT" \ - XDG_CONFIG_HOME="$TEST_DIR/no-user-config" \ - PATH="$PR_MOCK_BIN:$PATH" \ - timeout 30 bash "$SETUP_PR_LOOP" --claude --codex-model override-model:xhigh 2>&1) || pr_setup_exit=$? - - assert_eq "PR loop e2e: setup-pr-loop.sh exited successfully" \ - "0" "$pr_setup_exit" - - # Find the generated PR loop state.md - PR_STATE_FILE=$(find "$PR_E2E_PROJECT/.humanize/pr-loop" -name "state.md" 2>/dev/null | head -1 || true) - if [[ -z "$PR_STATE_FILE" ]]; then - fail "PR loop e2e: state.md was created" "non-empty path" "empty" - else - pass "PR loop e2e: state.md was created" - - # Assert --codex-model override is stored in state, not config values - assert_eq "PR loop e2e: --codex-model set codex_model (override-model)" \ - "override-model" "$(grep '^codex_model:' "$PR_STATE_FILE" | sed 's/codex_model: *//')" - - assert_eq "PR loop e2e: --codex-model set codex_effort (xhigh)" \ - "xhigh" "$(grep '^codex_effort:' "$PR_STATE_FILE" | sed 's/codex_effort: *//')" - fi - fi -fi - -echo "" - # ======================================== # ask-codex runtime behavioral test # ======================================== From 3374acb61ac904f76cac9b30a9d52a5924d0fa04 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sun, 29 Mar 2026 09:13:24 -0700 Subject: [PATCH 40/97] fix minor display bug of monitor script --- scripts/humanize.sh | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/scripts/humanize.sh b/scripts/humanize.sh index 346c1802..9804bde5 100755 --- a/scripts/humanize.sh +++ b/scripts/humanize.sh @@ -882,6 +882,10 @@ _humanize_monitor_codex() { # Handle case when no log file exists for current session if [[ -z "$current_file" ]]; then + # Track terminal dimensions to detect resize (fallback for SIGWINCH) + local centered_last_cols=$(tput cols) + local centered_last_rows=$(tput lines) + # Render centered no-log message if status changed or not yet shown if [[ "$last_no_log_status" != "$current_loop_status" ]]; then if [[ "$current_loop_status" == "active" ]]; then @@ -903,11 +907,17 @@ _humanize_monitor_codex() { return 0 fi - # Handle terminal resize at a safe point + # Detect terminal resize via both SIGWINCH flag and actual dimension change local redraw_centered_msg=false - if [[ "$resize_needed" == "true" ]]; then + local cur_cols=$(tput cols) + local cur_rows=$(tput lines) + if [[ "$resize_needed" == "true" ]] || \ + [[ "$cur_cols" != "$centered_last_cols" ]] || \ + [[ "$cur_rows" != "$centered_last_rows" ]]; then resize_needed=false redraw_centered_msg=true + centered_last_cols="$cur_cols" + centered_last_rows="$cur_rows" # Check if terminal is too small if ! _check_terminal_size; then _display_terminal_too_small @@ -919,6 +929,8 @@ _humanize_monitor_codex() { [[ "$monitor_running" != "true" ]] && break # Terminal is now big enough, reinitialize _setup_terminal + centered_last_cols=$(tput cols) + centered_last_rows=$(tput lines) else _update_scroll_region fi @@ -1003,6 +1015,10 @@ _humanize_monitor_codex() { local log_lines=$(_get_log_area_height) tail -n "$log_lines" "$current_file" 2>/dev/null + # Track terminal dimensions to detect resize (fallback for SIGWINCH) + local follow_last_cols=$(tput cols) + local follow_last_rows=$(tput lines) + # Incremental monitoring loop while [[ "$monitor_running" == "true" ]]; do sleep 0.5 # Check more frequently for smoother output @@ -1014,9 +1030,15 @@ _humanize_monitor_codex() { return 0 fi - # Handle terminal resize at a safe point - if [[ "$resize_needed" == "true" ]]; then + # Detect terminal resize via both SIGWINCH flag and actual dimension change + local cur_cols=$(tput cols) + local cur_rows=$(tput lines) + if [[ "$resize_needed" == "true" ]] || \ + [[ "$cur_cols" != "$follow_last_cols" ]] || \ + [[ "$cur_rows" != "$follow_last_rows" ]]; then resize_needed=false + follow_last_cols="$cur_cols" + follow_last_rows="$cur_rows" # Check if terminal is too small if ! _check_terminal_size; then _display_terminal_too_small @@ -1028,6 +1050,8 @@ _humanize_monitor_codex() { [[ "$monitor_running" != "true" ]] && break # Terminal is now big enough, reinitialize _setup_terminal + follow_last_cols=$(tput cols) + follow_last_rows=$(tput lines) else _update_scroll_region fi From a1f3614c283ce8fba19de7104f3904bc91837ef2 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sun, 29 Mar 2026 10:07:23 -0700 Subject: [PATCH 41/97] Be more specific about what is a round --- commands/start-rlcr-loop.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/commands/start-rlcr-loop.md b/commands/start-rlcr-loop.md index 1b53806d..f24fb156 100644 --- a/commands/start-rlcr-loop.md +++ b/commands/start-rlcr-loop.md @@ -127,6 +127,15 @@ This command starts an iterative development loop where: 7. If code review finds issues (`[P0-9]` markers), you fix them and continue 8. When no issues are found, the loop ends with a Finalize Phase +## What Is a Round + +**One round = the agent believes the entire plan is finished.** A round boundary is when the agent writes a summary and attempts to exit, triggering Codex review. This is the fundamental semantic: + +- A round is NOT one task, one milestone, one stage, or one layer of the plan. +- If the plan has multiple stages or milestones, they are all completed within a single round before writing the round summary. +- Intermediate progress checks (e.g., verifying a stage before starting the next) should use manual `ask-codex` calls, not round boundaries. +- Only write `round-N-summary.md` and attempt to exit when you believe ALL tasks in the plan are done. + ## Goal Tracker System This loop uses a **Goal Tracker** to prevent goal drift across iterations: From c045dff1c6b0a3a07eb738b0895951ad3baf6e15 Mon Sep 17 00:00:00 2001 From: gyy0592 Date: Sat, 4 Apr 2026 01:25:36 +0000 Subject: [PATCH 42/97] fix: normalize path slashes in PostToolUse hook to prevent session_id write failure When CLAUDE_PLUGIN_ROOT has a trailing slash, the command template produces double slashes (e.g. "humania//scripts/setup-rlcr-loop.sh"). The setup script normalizes its own path via cd+pwd (single slash), but tool_input.command preserves the original double-slash string. This causes the boundary-aware string match in loop-post-bash-hook.sh to always fail, so the .pending-session-id signal is never consumed and session_id is never written to state.md. With an empty session_id, find_active_loop() backward-compat logic matches any session, causing cross-instance stop hook hijacking when multiple Claude Code sessions run concurrently. Fix: normalize consecutive slashes with tr -s '/' before comparison. Fixes: https://github.com/humania-org/humanize/issues/67 Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- hooks/loop-post-bash-hook.sh | 10 ++++++++++ 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 5895b6ed..781c5e4c 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.15.1" + "version": "1.15.2" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 671561ce..93e1b789 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.15.1", + "version": "1.15.2", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index bf969241..7e46527b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.15.1** +**Current Version: 1.15.2** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. diff --git a/hooks/loop-post-bash-hook.sh b/hooks/loop-post-bash-hook.sh index eeed7bde..4dee0e7f 100755 --- a/hooks/loop-post-bash-hook.sh +++ b/hooks/loop-post-bash-hook.sh @@ -67,6 +67,16 @@ if [[ -n "$COMMAND_SIGNATURE" ]]; then exit 0 fi + # Normalize consecutive slashes (e.g. "humania//scripts" -> "humania/scripts"). + # CLAUDE_PLUGIN_ROOT may have a trailing slash, producing double slashes when + # concatenated with "/scripts/..." in the command template. The setup script + # normalizes its own path via cd+pwd (removing double slashes), but the + # tool_input.command preserves the original string. Without normalization, + # the string comparison below always fails and session_id is never written. + # See: https://github.com/humania-org/humanize/issues/67 + HOOK_COMMAND=$(printf '%s' "$HOOK_COMMAND" | tr -s '/') + COMMAND_SIGNATURE=$(printf '%s' "$COMMAND_SIGNATURE" | tr -s '/') + # Boundary-aware match: command must be a valid setup invocation form. # Requires the script path to be followed by end-of-string or any POSIX # whitespace ([[:space:]]), preventing concatenated forms. From 707097c52da364381addd9b34d47ca3b183a7f63 Mon Sep 17 00:00:00 2001 From: gyy0592 Date: Sat, 4 Apr 2026 18:35:28 +0000 Subject: [PATCH 43/97] revert: remove version bump, keep only hook fix Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 781c5e4c..5895b6ed 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.15.2" + "version": "1.15.1" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 93e1b789..671561ce 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.15.2", + "version": "1.15.1", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index 7e46527b..bf969241 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.15.2** +**Current Version: 1.15.1** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. From 79714b73d50e696593b23fccf289974c450a5daa Mon Sep 17 00:00:00 2001 From: Ligeng Zhu Date: Fri, 10 Apr 2026 02:28:58 +0800 Subject: [PATCH 44/97] Add support for multiple comment formats in refine-plan Extends comment parsing to support three formats: - Classic: CMT:/ENDCMT (existing) - Short tag: (new) - Long tag: (new) All formats support inline and multi-line usage and can be mixed within the same file. Updated documentation and error messages to be format-agnostic. Co-Authored-By: Claude Sonnet 4 --- README.md | 2 +- commands/refine-plan.md | 43 +++-- docs/usage.md | 38 ++++- scripts/validate-refine-plan-io.sh | 261 ++++++++++++++++++++--------- 4 files changed, 246 insertions(+), 98 deletions(-) diff --git a/README.md b/README.md index bf969241..aba201ac 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ Requires [codex CLI](https://github.com/openai/codex) for review. See the full [ /humanize:gen-plan --input draft.md --output docs/plan.md ``` -2. **Refine an annotated plan** before implementation when reviewers add `CMT:` ... `ENDCMT` comments: +2. **Refine an annotated plan** before implementation when reviewers add comments (`CMT:` ... `ENDCMT`, `` ... ``, or `` ... ``): ```bash /humanize:refine-plan --input docs/plan.md ``` diff --git a/commands/refine-plan.md b/commands/refine-plan.md index fc129016..0d97142f 100644 --- a/commands/refine-plan.md +++ b/commands/refine-plan.md @@ -34,7 +34,7 @@ The refined plan MUST reuse the existing `gen-plan` schema. Do not invent new to 1. **Execution Mode Setup**: Parse CLI arguments and derive output paths 2. **Load Project Config**: Resolve `alternative_plan_language` and mode defaults using `config-loader.sh` semantics 3. **IO Validation**: Run `validate-refine-plan-io.sh` -4. **Comment Extraction**: Scan the annotated plan and extract valid `CMT:` / `ENDCMT` blocks +4. **Comment Extraction**: Scan the annotated plan and extract valid comment blocks (`CMT:`/`ENDCMT`, ``/``, ``/``) 5. **Comment Classification**: Classify each extracted comment for downstream handling 6. **Comment Processing**: Answer questions, apply requested plan edits, and perform targeted research 7. **Plan Refinement**: Produce the comment-free refined plan while preserving the `gen-plan` structure @@ -167,7 +167,7 @@ Handle exit codes exactly: - Exit code 0: Continue to Phase 2 - Exit code 1: Report `Input file not found` and stop - Exit code 2: Report `Input file is empty` and stop -- Exit code 3: Report `Input file has no CMT:/ENDCMT blocks` and stop +- Exit code 3: Report `Input file has no comment blocks` and stop - Exit code 4: Report `Input file is missing required gen-plan sections` and stop - Exit code 5: Report `Output directory does not exist or is not writable - please fix it` and stop - Exit code 6: Report `QA directory is not writable` and stop @@ -196,17 +196,32 @@ Track these states while scanning the validated input in document order: Extraction rules: -1. Recognize `CMT:` as the start marker and `ENDCMT` as the end marker. -2. Support both inline and multi-line blocks: +1. Support three comment formats: + - Classic: `CMT:` as start marker and `ENDCMT` as end marker + - Short tag: `` as start marker and `` as end marker + - Long tag: `` as start marker and `` as end marker +2. Support both inline and multi-line blocks for all formats: - Inline: `Text before CMT: comment text ENDCMT text after` + - Inline: `Text before comment text text after` + - Inline: `Text before comment text text after` - Multi-line: ```markdown CMT: comment text ENDCMT ``` -3. Ignore `CMT:` and `ENDCMT` sequences inside fenced code blocks. -4. Ignore `CMT:` and `ENDCMT` sequences inside HTML comments. + ```markdown + + comment text + + ``` + ```markdown + + comment text + + ``` +3. Ignore comment markers inside fenced code blocks. +4. Ignore comment markers inside HTML comments. 5. Update `NEAREST_HEADING` whenever a Markdown heading is encountered outside fenced code and HTML comments. 6. Preserve surrounding non-comment text when removing inline comment blocks from the working plan text. 7. Assign raw comment IDs in document order as `CMT-1`, `CMT-2`, ... only for non-empty blocks. @@ -217,7 +232,7 @@ Extraction rules: For each non-empty comment block, capture: - `id` (`CMT-N`) -- `original_text` exactly as written between `CMT:` and `ENDCMT` +- `original_text` exactly as written between the comment markers - `normalized_text` with surrounding whitespace trimmed - `start_line`, `start_column` - `end_line`, `end_column` @@ -230,8 +245,8 @@ For each non-empty comment block, capture: These are fatal extraction errors: -1. Nested `CMT:` while already inside a comment block -2. `ENDCMT` encountered while not inside a comment block +1. Nested comment start marker while already inside a comment block +2. Comment end marker encountered while not inside a comment block or wrong end marker for the format 3. End of file reached while still inside a comment block Every fatal parse error MUST report: @@ -243,9 +258,9 @@ Every fatal parse error MUST report: Examples of acceptable messages: -- `Comment parse error: nested CMT block at line 48, column 3 near "## Acceptance Criteria" (context: "CMT: split AC-2...")` -- `Comment parse error: stray ENDCMT at line 109, column 1 near "## Task Breakdown" (context: "ENDCMT")` -- `Comment parse error: missing ENDCMT for block opened at line 72, column 5 near "## Dependencies and Sequence"` +- `Comment parse error: nested comment block at line 48, column 3 near "## Acceptance Criteria" (context: "split AC-2...")` +- `Comment parse error: stray comment end marker at line 109, column 1 near "## Task Breakdown" (context: "")` +- `Comment parse error: missing end marker for block opened at line 72, column 5 near "## Dependencies and Sequence"` ### Outputs from Phase 2 @@ -403,7 +418,7 @@ Optional sections that MUST be preserved when present in the input: ### Refinement Rules -1. Remove every resolved `CMT:` / `ENDCMT` tag and all enclosed comment text from the refined plan. +1. Remove every resolved comment marker and all enclosed comment text from the refined plan. 2. Do not add any new top-level schema section. 3. Preserve `AC-X` / `AC-X.Y` formatting. 4. Preserve task IDs unless a comment explicitly requests a structural change. @@ -429,7 +444,7 @@ Rules: Before generating the QA document, verify: 1. All required sections are still present -2. No `CMT:` or `ENDCMT` markers remain +2. No comment markers remain 3. Every referenced `AC-*` exists 4. Every task dependency references an existing task ID or `-` 5. Every task row has exactly one valid routing tag: `coding` or `analyze` diff --git a/docs/usage.md b/docs/usage.md index e12d45b9..0ef06046 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -46,7 +46,7 @@ The quiz is advisory, not a gate. You always have the option to proceed. But tha ```bash /humanize:gen-plan --input draft.md --output docs/plan.md ``` -2. If the plan is reviewed with `CMT:` ... `ENDCMT` annotations, refine it and generate a QA ledger: +2. If the plan is reviewed with comment annotations, refine it and generate a QA ledger: ```bash /humanize:refine-plan --input docs/plan.md ``` @@ -127,7 +127,7 @@ Workflow: 5. Generates a structured plan.md with acceptance criteria 6. Optionally starts `/humanize:start-rlcr-loop` if `--auto-start-rlcr-if-converged` conditions are met -If reviewers later annotate the generated plan with `CMT:` ... `ENDCMT` blocks, run +If reviewers later annotate the generated plan with comment blocks, run `/humanize:refine-plan --input ` before starting or resuming implementation. ### refine-plan @@ -169,9 +169,10 @@ how each comment was handled. **Annotated comment block format:** -`refine-plan` looks for reviewer comments wrapped in `CMT:` and `ENDCMT` markers. Both inline -and multi-line comment blocks are supported: +`refine-plan` supports three comment formats for reviewer annotations. Both inline +and multi-line comment blocks are supported in all formats: +**Classic format (CMT:/ENDCMT):** ```markdown Text before CMT: clarify why AC-3 is split here ENDCMT text after ``` @@ -183,11 +184,36 @@ If the dependency is unclear, add a pending decision instead of guessing. ENDCMT ``` +**Short tag format ():** +```markdown +Text before clarify why AC-3 is split here text after +``` + +```markdown + +Please investigate whether this task should depend on task4 or task5. +If the dependency is unclear, add a pending decision instead of guessing. + +``` + +**Long tag format ():** +```markdown +Text before clarify why AC-3 is split here text after +``` + +```markdown + +Please investigate whether this task should depend on task4 or task5. +If the dependency is unclear, add a pending decision instead of guessing. + +``` + Rules: -- At least one non-empty `CMT:` block must exist in the input file. -- `CMT:` and `ENDCMT` markers inside fenced code blocks or HTML comments are ignored. +- At least one non-empty comment block must exist in the input file. +- Comment markers inside fenced code blocks or HTML comments are ignored. - Empty comment blocks are removed but do not create QA ledger entries. - The input plan must still follow the `gen-plan` section schema. +- All three formats can be mixed within the same file. **QA output structure:** diff --git a/scripts/validate-refine-plan-io.sh b/scripts/validate-refine-plan-io.sh index f0ca70c0..34649193 100755 --- a/scripts/validate-refine-plan-io.sh +++ b/scripts/validate-refine-plan-io.sh @@ -5,7 +5,7 @@ # 0 - Success, all validations passed # 1 - Input file does not exist # 2 - Input file is empty -# 3 - Input file has no valid CMT:/ENDCMT blocks or has malformed CMT syntax +# 3 - Input file has no valid comment blocks or has malformed comment syntax # 4 - Input file missing required gen-plan sections # 5 - Output directory does not exist or is not writable, or input directory is not writable for in-place mode # 6 - QA directory not writable @@ -51,14 +51,67 @@ scan_cmt_blocks() { heading = current_heading() if (kind == "nested") { - printf "Comment parse error: nested CMT block at line %d, column %d near \"%s\" (context: \"%s\")\n", line_num, column, heading, excerpt > "/dev/stderr" + printf "Comment parse error: nested comment block at line %d, column %d near \"%s\" (context: \"%s\")\n", line_num, column, heading, excerpt > "/dev/stderr" } else if (kind == "stray_end") { - printf "Comment parse error: stray ENDCMT at line %d, column %d near \"%s\" (context: \"%s\")\n", line_num, column, heading, excerpt > "/dev/stderr" + printf "Comment parse error: stray comment end marker at line %d, column %d near \"%s\" (context: \"%s\")\n", line_num, column, heading, excerpt > "/dev/stderr" } exit fatal_code } + function find_comment_markers(text, start_pos, markers, i, pos, min_pos, closest_marker, closest_pos) { + # Initialize markers array + markers["CMT:"] = "classic_start" + markers[""] = "cmt_tag_start" + markers[""] = "comment_tag_start" + markers["ENDCMT"] = "classic_end" + markers[""] = "cmt_tag_end" + markers[""] = "comment_tag_end" + markers[""] = "html_end" + + closest_marker = "" + closest_pos = 0 + min_pos = length(text) + 1 + + for (marker in markers) { + pos = index(substr(text, start_pos), marker) + if (pos > 0) { + pos = start_pos + pos - 1 + if (pos < min_pos) { + min_pos = pos + closest_marker = marker + closest_pos = pos + } + } + } + + if (closest_marker == "") { + return "" + } else { + return closest_marker ":" closest_pos + } + } + + function get_end_marker_for_format(format) { + if (format == "classic") return "ENDCMT" + if (format == "cmt_tag") return "" + if (format == "comment_tag") return "" + return "" + } + + function get_marker_length(marker) { + if (marker == "CMT:") return 4 + if (marker == "") return 5 + if (marker == "") return 9 + if (marker == "ENDCMT") return 6 + if (marker == "") return 6 + if (marker == "") return 10 + if (marker == "") return 3 + return 0 + } + BEGIN { count = 0 in_fence = 0 @@ -71,6 +124,7 @@ scan_cmt_blocks() { cmt_open_heading = "Preamble" cmt_open_excerpt = "" cmt_has_text = 0 + cmt_format = "" # Track format: "classic", "cmt_tag", "comment_tag" fatal = 0 fatal_code = 0 } @@ -126,26 +180,8 @@ scan_cmt_blocks() { } if (in_cmt) { - html_rel = index(rest, " --- +{{COMMIT_HISTORY_SECTION}} + ## Part 1: Goal Tracker Audit (MANDATORY) Read @{{GOAL_TRACKER_FILE}} and verify: diff --git a/prompt-template/codex/regular-review.md b/prompt-template/codex/regular-review.md index 7db26ea2..4d4a8680 100644 --- a/prompt-template/codex/regular-review.md +++ b/prompt-template/codex/regular-review.md @@ -17,6 +17,8 @@ Below is Claude's summary of the work completed: --- +{{COMMIT_HISTORY_SECTION}} + ## Part 1: Implementation Review - Your task is to conduct a deep critical review, focusing on finding implementation issues and identifying gaps between "plan-design" and actual implementation. diff --git a/tests/test-commit-history-section.sh b/tests/test-commit-history-section.sh new file mode 100755 index 00000000..7177f5e8 --- /dev/null +++ b/tests/test-commit-history-section.sh @@ -0,0 +1,271 @@ +#!/usr/bin/env bash +# +# Test script for the Integral (I) component: commit-history-section +# +# Validates: +# 1. Round 0: "(no commits yet)" and "(first round, no prior history)" +# 2. Round 2+: commit log and round file references rendered correctly +# 3. Corrupted BASE_COMMIT: graceful fallback with annotation +# 4. Template missing: fallback renders the full section including round files +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" +source "$PROJECT_ROOT/hooks/lib/template-loader.sh" + +TEMPLATE_DIR="$PROJECT_ROOT/prompt-template" + +echo "========================================" +echo "Testing commit-history-section (I component)" +echo "========================================" +echo "" + +# ======================================== +# Setup: create a temporary git repo +# ======================================== +setup_test_dir +init_test_git_repo "$TEST_DIR/repo" + +# ======================================== +# Test 1: Round 0 - no commits since base, first round +# ======================================== +echo "Test 1: Round 0 - no commits, first round" + +CURRENT_ROUND=0 +BASE_COMMIT=$(git -C "$TEST_DIR/repo" rev-parse HEAD) + +# No commits since BASE_COMMIT..HEAD (same commit) +COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse "$BASE_COMMIT"..HEAD 2>/dev/null | tail -80) +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +RECENT_ROUND_FILES="" +LOOP_TIMESTAMP="2026-01-01_00-00-00" +for (( r = CURRENT_ROUND - 1; r >= 0 && r >= CURRENT_ROUND - 3; r-- )); do + RECENT_ROUND_FILES+="- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-summary.md +- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-review-result.md +" +done +[[ -z "$RECENT_ROUND_FILES" ]] && RECENT_ROUND_FILES="(first round, no prior history)" + +RESULT=$(load_and_render_safe "$TEMPLATE_DIR" "codex/commit-history-section.md" "FALLBACK" \ + "COMMIT_HISTORY=$COMMIT_HISTORY" \ + "RECENT_ROUND_FILES=$RECENT_ROUND_FILES") + +if echo "$RESULT" | grep -q "(no commits yet)" && echo "$RESULT" | grep -q "(first round, no prior history)"; then + pass "Round 0 shows correct placeholders" +else + fail "Round 0 placeholders" "(no commits yet) and (first round, no prior history)" "$RESULT" +fi + +# ======================================== +# Test 2: Round 3 - with commits and round history +# ======================================== +echo "" +echo "Test 2: Round 3 - commits and round file references" + +# Make some commits +cd "$TEST_DIR/repo" +echo "feat1" > feat1.txt && git add feat1.txt && git commit -q -m "feat: add feature 1" +echo "feat2" > feat2.txt && git add feat2.txt && git commit -q -m "feat: add feature 2" +echo "fix1" > fix1.txt && git add fix1.txt && git commit -q -m "fix: resolve bug in feature 1" +cd - > /dev/null + +CURRENT_ROUND=3 +COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse "$BASE_COMMIT"..HEAD 2>/dev/null | tail -80) +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +RECENT_ROUND_FILES="" +for (( r = CURRENT_ROUND - 1; r >= 0 && r >= CURRENT_ROUND - 3; r-- )); do + RECENT_ROUND_FILES+="- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-summary.md +- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-review-result.md +" +done +[[ -z "$RECENT_ROUND_FILES" ]] && RECENT_ROUND_FILES="(first round, no prior history)" + +RESULT=$(load_and_render_safe "$TEMPLATE_DIR" "codex/commit-history-section.md" "FALLBACK" \ + "COMMIT_HISTORY=$COMMIT_HISTORY" \ + "RECENT_ROUND_FILES=$RECENT_ROUND_FILES") + +HAS_COMMITS=true +HAS_ROUNDS=true + +echo "$RESULT" | grep -q "feat: add feature 1" || HAS_COMMITS=false +echo "$RESULT" | grep -q "feat: add feature 2" || HAS_COMMITS=false +echo "$RESULT" | grep -q "fix: resolve bug in feature 1" || HAS_COMMITS=false + +echo "$RESULT" | grep -q "round-2-summary.md" || HAS_ROUNDS=false +echo "$RESULT" | grep -q "round-1-summary.md" || HAS_ROUNDS=false +echo "$RESULT" | grep -q "round-0-summary.md" || HAS_ROUNDS=false +echo "$RESULT" | grep -q "round-2-review-result.md" || HAS_ROUNDS=false + +if [[ "$HAS_COMMITS" == "true" ]]; then + pass "Round 3 shows all 3 commits" +else + fail "Round 3 commits" "3 commit messages" "$RESULT" +fi + +if [[ "$HAS_ROUNDS" == "true" ]]; then + pass "Round 3 shows round 0-2 file references" +else + fail "Round 3 round files" "round-0/1/2 summary and review files" "$RESULT" +fi + +# ======================================== +# Test 3: Corrupted BASE_COMMIT - nonexistent object +# ======================================== +echo "" +echo "Test 3: Corrupted BASE_COMMIT graceful fallback" + +BAD_COMMIT="deadbeefdeadbeefdeadbeefdeadbeefdeadbeef" + +# Simulate the exact logic from the stop hook (merge-base --is-ancestor) +if [[ -n "$BAD_COMMIT" ]] && git -C "$TEST_DIR/repo" merge-base --is-ancestor "$BAD_COMMIT" HEAD 2>/dev/null; then + COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse "$BAD_COMMIT"..HEAD 2>/dev/null | tail -80) +else + COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse -30 2>/dev/null) + [[ -n "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(base commit unavailable, showing recent branch commits) +${COMMIT_HISTORY}" +fi +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +if echo "$COMMIT_HISTORY" | grep -q "base commit unavailable"; then + pass "Corrupted BASE_COMMIT triggers annotation" +else + fail "Corrupted BASE_COMMIT annotation" "base commit unavailable" "$COMMIT_HISTORY" +fi + +if echo "$COMMIT_HISTORY" | grep -q "feat: add feature"; then + pass "Corrupted BASE_COMMIT still shows recent commits" +else + fail "Corrupted BASE_COMMIT recent commits" "recent branch commits" "$COMMIT_HISTORY" +fi + +# Verify no crash (we got here = no set -e crash) +pass "Corrupted BASE_COMMIT did not crash (set -e safe)" + +# ======================================== +# Test 3b: Valid but unrelated commit (not ancestor of HEAD) +# ======================================== +echo "" +echo "Test 3b: Valid but unrelated BASE_COMMIT (orphan branch)" + +# Create an orphan branch with its own commit, then switch back +cd "$TEST_DIR/repo" +ORIG_BRANCH=$(git rev-parse --abbrev-ref HEAD) +git checkout -q --orphan orphan-test +echo "orphan" > orphan.txt && git add orphan.txt && git commit -q -m "orphan commit" +ORPHAN_COMMIT=$(git rev-parse HEAD) +git checkout -q "$ORIG_BRANCH" +cd - > /dev/null + +# ORPHAN_COMMIT exists but is NOT an ancestor of HEAD +if [[ -n "$ORPHAN_COMMIT" ]] && git -C "$TEST_DIR/repo" merge-base --is-ancestor "$ORPHAN_COMMIT" HEAD 2>/dev/null; then + COMMIT_HISTORY="should not reach here" +else + COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse -30 2>/dev/null) + [[ -n "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(base commit unavailable, showing recent branch commits) +${COMMIT_HISTORY}" +fi +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +if echo "$COMMIT_HISTORY" | grep -q "base commit unavailable"; then + pass "Unrelated valid commit triggers annotation" +else + fail "Unrelated valid commit annotation" "base commit unavailable" "$COMMIT_HISTORY" +fi + +# ======================================== +# Test 4: Missing template - fallback renders full section +# ======================================== +echo "" +echo "Test 4: Missing template fallback renders full section" + +CURRENT_ROUND=2 +COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse "$BASE_COMMIT"..HEAD 2>/dev/null | tail -80) + +RECENT_ROUND_FILES="" +for (( r = CURRENT_ROUND - 1; r >= 0 && r >= CURRENT_ROUND - 3; r-- )); do + RECENT_ROUND_FILES+="- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-summary.md +- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-review-result.md +" +done + +# Use the exact fallback format from the stop hook +COMMIT_HISTORY_SECTION_FALLBACK="## Development History (Integral Context) +\`\`\` +${COMMIT_HISTORY} +\`\`\` +### Recent Round Files +Read these files before conducting your review to understand the trajectory of work: +${RECENT_ROUND_FILES}" + +# Point to a non-existent template to force fallback +RESULT=$(load_and_render_safe "$TEMPLATE_DIR" "codex/non-existent-template.md" "$COMMIT_HISTORY_SECTION_FALLBACK" \ + "COMMIT_HISTORY=$COMMIT_HISTORY" \ + "RECENT_ROUND_FILES=$RECENT_ROUND_FILES") + +FALLBACK_OK=true +echo "$RESULT" | grep -q "Development History" || FALLBACK_OK=false +echo "$RESULT" | grep -q "feat: add feature 1" || FALLBACK_OK=false +echo "$RESULT" | grep -q "Recent Round Files" || FALLBACK_OK=false +echo "$RESULT" | grep -q "round-1-summary.md" || FALLBACK_OK=false +echo "$RESULT" | grep -q "round-0-review-result.md" || FALLBACK_OK=false +echo "$RESULT" | grep -q "Read these files" || FALLBACK_OK=false + +if [[ "$FALLBACK_OK" == "true" ]]; then + pass "Fallback renders full section with commits, round files, and directive" +else + fail "Fallback full section" "commits + round files + directive" "$RESULT" +fi + +# ======================================== +# Test 5: Round 1 - only 1 prior round (boundary) +# ======================================== +echo "" +echo "Test 5: Round 1 - only 1 prior round" + +CURRENT_ROUND=1 +RECENT_ROUND_FILES="" +for (( r = CURRENT_ROUND - 1; r >= 0 && r >= CURRENT_ROUND - 3; r-- )); do + RECENT_ROUND_FILES+="- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-summary.md +- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-review-result.md +" +done +[[ -z "$RECENT_ROUND_FILES" ]] && RECENT_ROUND_FILES="(first round, no prior history)" + +if echo "$RECENT_ROUND_FILES" | grep -q "round-0-summary.md" && \ + ! echo "$RECENT_ROUND_FILES" | grep -q "round-1-"; then + pass "Round 1 references only round 0" +else + fail "Round 1 boundary" "only round-0 references" "$RECENT_ROUND_FILES" +fi + +# ======================================== +# Test 6: Empty BASE_COMMIT (legacy loop) +# ======================================== +echo "" +echo "Test 6: Empty BASE_COMMIT fallback" + +EMPTY_BASE="" +if [[ -n "$EMPTY_BASE" ]] && git -C "$TEST_DIR/repo" merge-base --is-ancestor "$EMPTY_BASE" HEAD 2>/dev/null; then + COMMIT_HISTORY="should not reach here" +else + COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse -30 2>/dev/null) + [[ -n "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(base commit unavailable, showing recent branch commits) +${COMMIT_HISTORY}" +fi +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +if echo "$COMMIT_HISTORY" | grep -q "base commit unavailable"; then + pass "Empty BASE_COMMIT triggers annotation" +else + fail "Empty BASE_COMMIT annotation" "base commit unavailable" "$COMMIT_HISTORY" +fi + +# ======================================== +# Summary +# ======================================== +print_test_summary "Commit History Section (I Component) Tests" From fc8a02774e6e16b3cd0745948f93573ba8f81412 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Wed, 15 Apr 2026 11:00:31 -0700 Subject: [PATCH 51/97] Update repo org URLs from humania-org to PolyArch Bump version to 1.15.2. --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 6 +++--- README.md | 4 ++-- docs/install-for-claude.md | 4 ++-- docs/install-for-codex.md | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 588e3b1d..8686b27c 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -1,7 +1,7 @@ { "name": "humania", "owner": { - "name": "humania-org" + "name": "PolyArch" }, "plugins": [ { diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 0f9ab32b..fd77b933 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -3,10 +3,10 @@ "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", "version": "1.16.0", "author": { - "name": "humania-org" + "name": "PolyArch" }, - "repository": "https://github.com/humania-org/humanize", - "homepage": "https://github.com/humania-org/humanize#readme", + "repository": "https://github.com/PolyArch/humanize", + "homepage": "https://github.com/PolyArch/humanize#readme", "license": "MIT", "keywords": [ "iterative-development", diff --git a/README.md b/README.md index adae3a4a..3ddc24ed 100644 --- a/README.md +++ b/README.md @@ -30,9 +30,9 @@ The loop has two phases: **Implementation** (Claude works, Codex reviews summari ```bash # Add humania marketplace -/plugin marketplace add humania-org/humanize +/plugin marketplace add PolyArch/humanize # If you want to use development branch for experimental features -/plugin marketplace add humania-org/humanize#dev +/plugin marketplace add PolyArch/humanize#dev # Then install humanize plugin /plugin install humanize@humania ``` diff --git a/docs/install-for-claude.md b/docs/install-for-claude.md index 5af9b4ca..c190dd2e 100644 --- a/docs/install-for-claude.md +++ b/docs/install-for-claude.md @@ -12,7 +12,7 @@ Start Claude Code and run: ```bash # Add the marketplace -/plugin marketplace add git@github.com:humania-org/humanize.git +/plugin marketplace add git@github.com:PolyArch/humanize.git # Install the plugin /plugin install humanize@humania @@ -31,7 +31,7 @@ claude --plugin-dir /path/to/humanize The `dev` branch contains experimental features that are not yet released to `main`. To try them locally: ```bash -git clone https://github.com/humania-org/humanize.git +git clone https://github.com/PolyArch/humanize.git cd humanize git checkout dev ``` diff --git a/docs/install-for-codex.md b/docs/install-for-codex.md index 8a9aecd0..5d34b24c 100644 --- a/docs/install-for-codex.md +++ b/docs/install-for-codex.md @@ -7,7 +7,7 @@ This guide explains how to install Humanize for Codex CLI, including the skill r One-line install from anywhere: ```bash -tmp_dir="$(mktemp -d)" && git clone --depth 1 https://github.com/humania-org/humanize.git "$tmp_dir/humanize" && "$tmp_dir/humanize/scripts/install-skills-codex.sh" +tmp_dir="$(mktemp -d)" && git clone --depth 1 https://github.com/PolyArch/humanize.git "$tmp_dir/humanize" && "$tmp_dir/humanize/scripts/install-skills-codex.sh" ``` From the Humanize repo root: From 5c2f07ff34416007642f8af3bf89b53beca37b46 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Wed, 15 Apr 2026 11:01:01 -0700 Subject: [PATCH 52/97] Update remaining repo org URLs from humania-org to PolyArch --- hooks/loop-post-bash-hook.sh | 2 +- prompt-template/claude/methodology-analysis-prompt.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hooks/loop-post-bash-hook.sh b/hooks/loop-post-bash-hook.sh index fc0818ee..699d21bd 100755 --- a/hooks/loop-post-bash-hook.sh +++ b/hooks/loop-post-bash-hook.sh @@ -73,7 +73,7 @@ if [[ -n "$COMMAND_SIGNATURE" ]]; then # normalizes its own path via cd+pwd (removing double slashes), but the # tool_input.command preserves the original string. Without normalization, # the string comparison below always fails and session_id is never written. - # See: https://github.com/humania-org/humanize/issues/67 + # See: https://github.com/PolyArch/humanize/issues/67 HOOK_COMMAND=$(printf '%s' "$HOOK_COMMAND" | tr -s '/') COMMAND_SIGNATURE=$(printf '%s' "$COMMAND_SIGNATURE" | tr -s '/') diff --git a/prompt-template/claude/methodology-analysis-prompt.md b/prompt-template/claude/methodology-analysis-prompt.md index f8a7ec4b..085153e5 100644 --- a/prompt-template/claude/methodology-analysis-prompt.md +++ b/prompt-template/claude/methodology-analysis-prompt.md @@ -64,7 +64,7 @@ c) **If user declines**: Thank them, write completion marker to `{{LOOP_DIR}}/me d) **If user agrees**: - Draft a GitHub issue title and body from the analysis report - Show the draft via a second `AskUserQuestion` for the user to review and confirm - - If confirmed: run `gh issue create --repo humania-org/humanize --title "..." --body "..."` + - If confirmed: run `gh issue create --repo PolyArch/humanize --title "..." --body "..."` - If `gh` is not available, provide the title and body so the user can create the issue manually - Write completion marker to `{{LOOP_DIR}}/methodology-analysis-done.md` and exit From 67fa1aea341c0b412c1fbd384d3ab78e7e26c73e Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Wed, 15 Apr 2026 13:12:00 -0700 Subject: [PATCH 53/97] Reset dev version to 1.16.0 --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index e72bd0aa..2e833ddc 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.1" + "version": "1.16.0" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 1ed2e046..fd77b933 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.1", + "version": "1.16.0", "author": { "name": "PolyArch" }, diff --git a/README.md b/README.md index 44bea807..da6d8305 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.16.1** +**Current Version: 1.16.0** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. From a9078aaf4d432d28e014ae172aa863f825d6f777 Mon Sep 17 00:00:00 2001 From: jiaxiaoyu Date: Thu, 16 Apr 2026 13:39:23 +0800 Subject: [PATCH 54/97] fix: block tracked humanize loop state from git --- hooks/lib/loop-common.sh | 56 ++++++++++++++++++- hooks/loop-codex-stop-hook.sh | 15 +++++ prompt-template/block/git-not-clean.md | 4 +- prompt-template/block/git-tracked-humanize.md | 23 ++++++++ tests/test-humanize-escape.sh | 5 ++ tests/test-stop-gate.sh | 47 +++++++++++++--- 6 files changed, 139 insertions(+), 11 deletions(-) create mode 100644 prompt-template/block/git-tracked-humanize.md diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 2425449b..f6637a05 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -1221,7 +1221,7 @@ git_adds_humanize() { # Check for direct .humanize reference (blocked regardless of other flags) # Handles: .humanize, ./.humanize, path/to/.humanize, ".humanize", '.humanize' # Pattern matches .humanize at start, after space, after / or ./ AND followed by end, /, or space - # This avoids over-blocking .humanizeconfig or .humanize-backup + # This avoids over-blocking .humanizeconfig or .humanize-backup. if echo "$add_args_normalized" | grep -qE '(^|[[:space:]]|/)\.humanize($|/|[[:space:]])'; then return 0 fi @@ -1319,6 +1319,60 @@ IMPORTANT: The commit message must NOT contain the literal string \".humanize\" load_and_render_safe "$TEMPLATE_DIR" "block/git-add-humanize.md" "$fallback" } +# Return success if local Humanize runtime state has entered git tracking or the index. +# Untracked .humanize state is allowed; tracked or staged state must be blocked. +# Usage: git_has_tracked_humanize_state [project_root] +git_has_tracked_humanize_state() { + local project_root="${1:-.}" + + if [[ ! -d "$project_root/.git" ]] && ! git -C "$project_root" rev-parse --git-dir >/dev/null 2>&1; then + return 1 + fi + + if git -C "$project_root" ls-files --error-unmatch .humanize >/dev/null 2>&1; then + return 0 + fi + if git -C "$project_root" ls-files '.humanize/*' '.humanize-*' | grep -q '.'; then + return 0 + fi + + if git -C "$project_root" diff --cached --name-only -- .humanize '.humanize-*' | grep -q '.'; then + return 0 + fi + + return 1 +} + +# Standard message for blocking tracked/staged .humanize state. +# Usage: git_tracked_humanize_blocked_message +git_tracked_humanize_blocked_message() { + local fallback="# Tracked Humanize State Blocked + +Detected tracked or staged files under \`.humanize/\` (or legacy \`.humanize-*\`). + +These files are local Humanize loop state and must remain outside version control. + +## Required Fix + +1. Remove Humanize state from the index, for example: + + git rm --cached -r .humanize + + If legacy tracked state exists, remove those entries too: + + git rm --cached -r .humanize-* + +2. Keep only real project files staged. +3. Retry the stop action after the local state is no longer tracked. + +## Important + +- Do NOT use \`git add -f\` on Humanize state files. +- Do NOT commit RLCR trackers, round summaries, contracts, or cancel/finalize markers." + + load_and_render_safe "$TEMPLATE_DIR" "block/git-tracked-humanize.md" "$fallback" +} + # Standard message for blocking direct execution of hook scripts # Usage: stop_hook_direct_execution_blocked_message stop_hook_direct_execution_blocked_message() { diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index f9b261b1..570a4867 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -622,6 +622,21 @@ if [[ "$GIT_IS_REPO" == "true" ]]; then GIT_ISSUES="" SPECIAL_NOTES="" + if git_has_tracked_humanize_state "$PROJECT_ROOT"; then + cleanup_stale_index_lock + REASON=$(git_tracked_humanize_blocked_message) + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - tracked Humanize state detected, remove it from git first" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi + # Check for uncommitted changes (staged or unstaged) using cached status. # Exclude untracked .humanize/ paths and .humanize-* dash-separated legacy # variants from the dirty determination because local plugin state under diff --git a/prompt-template/block/git-not-clean.md b/prompt-template/block/git-not-clean.md index 300e7fb8..f7fdfaf5 100644 --- a/prompt-template/block/git-not-clean.md +++ b/prompt-template/block/git-not-clean.md @@ -5,10 +5,12 @@ You are trying to stop, but you have **{{GIT_ISSUES}}**. **Required Actions**: 0. If the `code-simplifier` plugin is installed, use it to review and simplify your code before committing. Invoke via: `/code-simplifier`, `@agent-code-simplifier`, or `@code-simplifier:code-simplifier (agent)` 1. Review untracked files - add build artifacts to `.gitignore` -2. Stage real changes: `git add ` (or `git add -A` if all files should be tracked) +2. Stage only real changes with specific paths: `git add ` 3. Commit with a descriptive message following project conventions **Important Rules**: +- Do NOT use `git add -A`, `git add --all`, or `git add .` during an active RLCR loop +- Never stage `.humanize/` or legacy `.humanize-*` loop artifacts - Commit message must follow project conventions - AI tools (Claude, Codex, etc.) must NOT have authorship in commits - Do NOT include `Co-Authored-By: Claude` or similar AI attribution diff --git a/prompt-template/block/git-tracked-humanize.md b/prompt-template/block/git-tracked-humanize.md new file mode 100644 index 00000000..afb53ecc --- /dev/null +++ b/prompt-template/block/git-tracked-humanize.md @@ -0,0 +1,23 @@ +# Tracked Humanize State Blocked + +Detected tracked or staged files under `.humanize/` or legacy `.humanize-*`. + +These files are local Humanize loop state and must remain outside version control. + +## Required Fix + +1. Remove Humanize state from the index, for example: + + git rm --cached -r .humanize + + If legacy tracked state exists, remove those entries too: + + git rm --cached -r .humanize-* + +2. Keep only real project files staged. +3. Retry the stop action after the local state is no longer tracked. + +## Important + +- Do NOT use `git add -f` on Humanize state files. +- Do NOT commit RLCR trackers, round summaries, contracts, or cancel/finalize markers. diff --git a/tests/test-humanize-escape.sh b/tests/test-humanize-escape.sh index 644a349f..a46578d5 100755 --- a/tests/test-humanize-escape.sh +++ b/tests/test-humanize-escape.sh @@ -85,6 +85,9 @@ assert_blocks "git add ./.humanize/" "Block: ./.humanize/ with trailing slash" assert_blocks "git add ./.humanize/file.md" "Block: ./.humanize/file.md" assert_blocks "git add path/to/.humanize" "Block: path/to/.humanize" assert_blocks "git add ../project/.humanize" "Block: ../project/.humanize" +assert_blocks "git add .humanize/rlcr/2026-03-01_00-00-00/goal-tracker.md" "Block: RLCR goal tracker path" +assert_blocks "git add .humanize/rlcr/2026-03-01_00-00-00/round-3-summary.md" "Block: RLCR round summary path" +assert_blocks "git add .humanize/rlcr/2026-03-01_00-00-00/round-3-contract.md" "Block: RLCR round contract path" # ======================================== # Test Group 2: Quoted Path Variants @@ -98,6 +101,7 @@ assert_blocks "git add '.humanize'" "Block: single-quoted .humanize" assert_blocks 'git add "./.humanize"' "Block: double-quoted ./.humanize" assert_blocks "git add './.humanize'" "Block: single-quoted ./.humanize" assert_blocks 'git add "path/to/.humanize"' "Block: double-quoted path/to/.humanize" +assert_blocks 'git add ".humanize/rlcr/2026-03-01_00-00-00/goal-tracker.md"' "Block: double-quoted RLCR file path" # ======================================== # Test Group 3: Combined Force and Path Variants @@ -109,6 +113,7 @@ echo "" assert_blocks "git add -f ./.humanize" "Block: -f with ./.humanize" assert_blocks "git add --force ./.humanize" "Block: --force with ./.humanize" assert_blocks 'git add -f ".humanize"' "Block: -f with quoted .humanize" +assert_blocks "git add -f .humanize/rlcr/2026-03-01_00-00-00/goal-tracker.md" "Block: -f with RLCR goal tracker" # Force flag with broad scope (blocks gitignore bypass) assert_blocks "git add -f ." "Block: -f . (force with current dir)" diff --git a/tests/test-stop-gate.sh b/tests/test-stop-gate.sh index 82b92132..1c927b08 100755 --- a/tests/test-stop-gate.sh +++ b/tests/test-stop-gate.sh @@ -115,30 +115,59 @@ else fail "rlcr-stop-gate --project-root output contains expected block reason" "output containing BLOCK:" "$OUTPUT2" fi -# Test 3: No active loop -> gate allows exit (exit 0) +# Test 3: Tracked Humanize state blocks before normal loop validation T3_DIR="$TEST_DIR/t3" -mkdir -p "$T3_DIR/empty-project" +mkdir -p "$T3_DIR" +setup_active_loop_fixture "$T3_DIR/project" +echo "tracked" > "$T3_DIR/project/.humanize/rlcr/2026-03-01_00-00-00/goal-tracker.md" +git -C "$T3_DIR/project" add -f .humanize/rlcr/2026-03-01_00-00-00/goal-tracker.md set +e ( - cd "$T3_DIR/empty-project" + cd "$T3_DIR/project" "$GATE_SCRIPT" ) > "$T3_DIR/out.txt" 2>&1 EXIT3=$? set -e -if [[ "$EXIT3" -eq 0 ]]; then - pass "rlcr-stop-gate exits 0 when no active loop exists" +if [[ "$EXIT3" -eq 10 ]]; then + pass "rlcr-stop-gate blocks tracked Humanize state" else OUTPUT3=$(cat "$T3_DIR/out.txt" 2>/dev/null || true) - fail "rlcr-stop-gate exits 0 when no active loop exists" "exit 0" "exit $EXIT3; output: $OUTPUT3" + fail "rlcr-stop-gate blocks tracked Humanize state" "exit 10" "exit $EXIT3; output: $OUTPUT3" fi -if grep -q "^ALLOW:" "$T3_DIR/out.txt" 2>/dev/null; then - pass "rlcr-stop-gate reports ALLOW when no active loop" +if grep -q "Tracked Humanize State Blocked" "$T3_DIR/out.txt" 2>/dev/null; then + pass "rlcr-stop-gate reports tracked Humanize state with dedicated reason" else OUTPUT3=$(cat "$T3_DIR/out.txt" 2>/dev/null || true) - fail "rlcr-stop-gate reports ALLOW when no active loop" "output containing ALLOW:" "$OUTPUT3" + fail "rlcr-stop-gate reports tracked Humanize state with dedicated reason" "output containing Tracked Humanize State Blocked" "$OUTPUT3" +fi + +# Test 4: No active loop -> gate allows exit (exit 0) +T4_DIR="$TEST_DIR/t4" +mkdir -p "$T4_DIR/empty-project" + +set +e +( + cd "$T4_DIR/empty-project" + "$GATE_SCRIPT" +) > "$T4_DIR/out.txt" 2>&1 +EXIT4=$? +set -e + +if [[ "$EXIT4" -eq 0 ]]; then + pass "rlcr-stop-gate exits 0 when no active loop exists" +else + OUTPUT4=$(cat "$T4_DIR/out.txt" 2>/dev/null || true) + fail "rlcr-stop-gate exits 0 when no active loop exists" "exit 0" "exit $EXIT4; output: $OUTPUT4" +fi + +if grep -q "^ALLOW:" "$T4_DIR/out.txt" 2>/dev/null; then + pass "rlcr-stop-gate reports ALLOW when no active loop" +else + OUTPUT4=$(cat "$T4_DIR/out.txt" 2>/dev/null || true) + fail "rlcr-stop-gate reports ALLOW when no active loop" "output containing ALLOW:" "$OUTPUT4" fi print_test_summary "RLCR Stop Gate Wrapper Test Summary" From 421ae97fd2aa790d11ccbd506bb6c4a1dc2c1678 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 09:53:36 -0700 Subject: [PATCH 55/97] Narrow tracked-humanize guard to .humanize/ to avoid false blocks The tracked-state check used '.humanize-*' globs which matched unrelated dot-prefixed files the command-side guard already allows (for example .humanize-backup and .humanizeconfig, per tests/test-humanize-escape.sh). In an active loop, a repo that intentionally tracked such a file was incorrectly blocked with "Tracked Humanize State Blocked" even with no RLCR artifacts present. Scope the detection to .humanize/ via a single ls-files pathspec. This stays consistent with git_adds_humanize, correctly omits paths the user has staged for removal via git rm --cached, and drops the redundant diff --cached probe. Update the block message (inline fallback and prompt template) to drop the dangerous "git rm --cached -r .humanize-*" hint, which could strip user files like .humanize-backup from the index. Add a stop-gate regression test asserting that tracked .humanize-backup and .humanizeconfig do not trigger the tracked-state block reason. --- hooks/lib/loop-common.sh | 24 ++++----- prompt-template/block/git-tracked-humanize.md | 8 +-- tests/test-stop-gate.sh | 51 +++++++++++++++---- 3 files changed, 54 insertions(+), 29 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index f6637a05..8d013b71 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -1322,6 +1322,13 @@ IMPORTANT: The commit message must NOT contain the literal string \".humanize\" # Return success if local Humanize runtime state has entered git tracking or the index. # Untracked .humanize state is allowed; tracked or staged state must be blocked. # Usage: git_has_tracked_humanize_state [project_root] +# +# Intentionally scoped to .humanize/ to stay consistent with git_adds_humanize, +# which explicitly allows unrelated paths like .humanize-backup or +# .humanizeconfig (see tests/test-humanize-escape.sh). ls-files covers both +# committed entries and paths staged via git add; paths the user has staged for +# removal via git rm --cached are correctly omitted so the user can unstick +# themselves without being re-blocked. git_has_tracked_humanize_state() { local project_root="${1:-.}" @@ -1329,14 +1336,7 @@ git_has_tracked_humanize_state() { return 1 fi - if git -C "$project_root" ls-files --error-unmatch .humanize >/dev/null 2>&1; then - return 0 - fi - if git -C "$project_root" ls-files '.humanize/*' '.humanize-*' | grep -q '.'; then - return 0 - fi - - if git -C "$project_root" diff --cached --name-only -- .humanize '.humanize-*' | grep -q '.'; then + if git -C "$project_root" ls-files -- .humanize 2>/dev/null | grep -q '.'; then return 0 fi @@ -1348,20 +1348,16 @@ git_has_tracked_humanize_state() { git_tracked_humanize_blocked_message() { local fallback="# Tracked Humanize State Blocked -Detected tracked or staged files under \`.humanize/\` (or legacy \`.humanize-*\`). +Detected tracked or staged files under \`.humanize/\`. These files are local Humanize loop state and must remain outside version control. ## Required Fix -1. Remove Humanize state from the index, for example: +1. Remove Humanize state from the index: git rm --cached -r .humanize - If legacy tracked state exists, remove those entries too: - - git rm --cached -r .humanize-* - 2. Keep only real project files staged. 3. Retry the stop action after the local state is no longer tracked. diff --git a/prompt-template/block/git-tracked-humanize.md b/prompt-template/block/git-tracked-humanize.md index afb53ecc..3a7b36df 100644 --- a/prompt-template/block/git-tracked-humanize.md +++ b/prompt-template/block/git-tracked-humanize.md @@ -1,19 +1,15 @@ # Tracked Humanize State Blocked -Detected tracked or staged files under `.humanize/` or legacy `.humanize-*`. +Detected tracked or staged files under `.humanize/`. These files are local Humanize loop state and must remain outside version control. ## Required Fix -1. Remove Humanize state from the index, for example: +1. Remove Humanize state from the index: git rm --cached -r .humanize - If legacy tracked state exists, remove those entries too: - - git rm --cached -r .humanize-* - 2. Keep only real project files staged. 3. Retry the stop action after the local state is no longer tracked. diff --git a/tests/test-stop-gate.sh b/tests/test-stop-gate.sh index 1c927b08..32f2c3ac 100755 --- a/tests/test-stop-gate.sh +++ b/tests/test-stop-gate.sh @@ -144,30 +144,63 @@ else fail "rlcr-stop-gate reports tracked Humanize state with dedicated reason" "output containing Tracked Humanize State Blocked" "$OUTPUT3" fi -# Test 4: No active loop -> gate allows exit (exit 0) +# Test 4: Unrelated dot-prefixed files that happen to start with .humanize- +# must not be treated as loop state. .humanize-backup and .humanizeconfig are +# explicitly allowed by the git add validator (tests/test-humanize-escape.sh); +# the tracked-state guard must stay consistent and ignore them. T4_DIR="$TEST_DIR/t4" -mkdir -p "$T4_DIR/empty-project" +mkdir -p "$T4_DIR" +setup_active_loop_fixture "$T4_DIR/project" +echo "not loop state" > "$T4_DIR/project/.humanize-backup" +echo "not loop state" > "$T4_DIR/project/.humanizeconfig" +git -C "$T4_DIR/project" add -f .humanize-backup .humanizeconfig set +e ( - cd "$T4_DIR/empty-project" + cd "$T4_DIR/project" "$GATE_SCRIPT" ) > "$T4_DIR/out.txt" 2>&1 EXIT4=$? set -e -if [[ "$EXIT4" -eq 0 ]]; then - pass "rlcr-stop-gate exits 0 when no active loop exists" +if [[ "$EXIT4" -eq 10 ]]; then + pass "rlcr-stop-gate does not confuse .humanize-backup with loop state" else OUTPUT4=$(cat "$T4_DIR/out.txt" 2>/dev/null || true) - fail "rlcr-stop-gate exits 0 when no active loop exists" "exit 0" "exit $EXIT4; output: $OUTPUT4" + fail "rlcr-stop-gate does not confuse .humanize-backup with loop state" "exit 10" "exit $EXIT4; output: $OUTPUT4" fi -if grep -q "^ALLOW:" "$T4_DIR/out.txt" 2>/dev/null; then - pass "rlcr-stop-gate reports ALLOW when no active loop" +if ! grep -q "Tracked Humanize State Blocked" "$T4_DIR/out.txt" 2>/dev/null; then + pass "rlcr-stop-gate does not emit tracked-state reason for .humanize-backup" else OUTPUT4=$(cat "$T4_DIR/out.txt" 2>/dev/null || true) - fail "rlcr-stop-gate reports ALLOW when no active loop" "output containing ALLOW:" "$OUTPUT4" + fail "rlcr-stop-gate does not emit tracked-state reason for .humanize-backup" "no Tracked Humanize State Blocked line" "$OUTPUT4" +fi + +# Test 5: No active loop -> gate allows exit (exit 0) +T5_DIR="$TEST_DIR/t5" +mkdir -p "$T5_DIR/empty-project" + +set +e +( + cd "$T5_DIR/empty-project" + "$GATE_SCRIPT" +) > "$T5_DIR/out.txt" 2>&1 +EXIT5=$? +set -e + +if [[ "$EXIT5" -eq 0 ]]; then + pass "rlcr-stop-gate exits 0 when no active loop exists" +else + OUTPUT5=$(cat "$T5_DIR/out.txt" 2>/dev/null || true) + fail "rlcr-stop-gate exits 0 when no active loop exists" "exit 0" "exit $EXIT5; output: $OUTPUT5" +fi + +if grep -q "^ALLOW:" "$T5_DIR/out.txt" 2>/dev/null; then + pass "rlcr-stop-gate reports ALLOW when no active loop" +else + OUTPUT5=$(cat "$T5_DIR/out.txt" 2>/dev/null || true) + fail "rlcr-stop-gate reports ALLOW when no active loop" "output containing ALLOW:" "$OUTPUT5" fi print_test_summary "RLCR Stop Gate Wrapper Test Summary" From 9730744d124ad0ab0ae9ef5877ddddc86fb176fa Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 13:35:00 -0700 Subject: [PATCH 56/97] Allow natural stop when background tasks still running Add a background-task short-circuit to the RLCR stop hook so it exits 0 with a user-facing systemMessage whenever the Claude Code transcript shows at least one pending background dispatch (Agent run_in_background, or Bash run_in_background) that has not yet produced a completion queue-operation notification. The short-circuit precedes every other gate (phase detection, state parse, branch consistency, plan integrity, git cleanliness, summary, BitLesson, max iterations, Codex review) and touches no on-disk state; when the background work finishes, the next natural stop re-enters the normal review flow. Supporting helpers in loop-common.sh: - extract_transcript_path mirrors extract_session_id - list_pending_background_task_ids parses the transcript jsonl and diffs launched ids against queue-operation completion task-ids - has_pending_background_tasks + count_pending_background_tasks wrap that list with fail-closed semantics when the transcript path is missing or jq is unavailable The rlcr-stop-gate wrapper had two latent bugs newly exposed by forwarding transcript_path end-to-end: select(length>0) on an object field collapsed the entire JSON when session_id was empty, and a wrapped response with no decision field was treated as Unexpected instead of ALLOW. Both paths are fixed alongside this change. tests/test-stop-hook-bg-allow.sh covers AC-1..AC-9 end-to-end (11 cases including two missing-path variants and one wrapper smoke test) and is registered in run-all-tests.sh. Full suite: 1690 passed, 0 failed. Branch targets dev; version stays at 1.16.0 as requested. --- hooks/lib/loop-common.sh | 96 +++++++ hooks/loop-codex-stop-hook.sh | 24 ++ scripts/rlcr-stop-gate.sh | 30 +- tests/run-all-tests.sh | 1 + tests/test-stop-hook-bg-allow.sh | 467 +++++++++++++++++++++++++++++++ 5 files changed, 613 insertions(+), 5 deletions(-) create mode 100755 tests/test-stop-hook-bg-allow.sh diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 8d013b71..a2119511 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -242,6 +242,102 @@ extract_session_id() { printf '%s' "$input" | jq -r '.session_id // empty' 2>/dev/null || echo "" } +# Extract transcript_path from hook JSON input +# Usage: extract_transcript_path "$json_input" +# Outputs the transcript_path to stdout, or empty string if not available +extract_transcript_path() { + local input="$1" + printf '%s' "$input" | jq -r '.transcript_path // empty' 2>/dev/null || echo "" +} + +# Enumerate background-task ids that have been launched but not yet marked +# completed in a Claude Code transcript.jsonl. +# +# Launch events (inspected in tool_result "user" messages): +# - Background subagent: toolUseResult.isAsync == true +# -> id is toolUseResult.agentId +# - Background shell: toolUseResult.backgroundTaskId non-empty +# -> id is toolUseResult.backgroundTaskId +# +# Completion events (inspected in "queue-operation" messages with +# operation == "enqueue" whose content contains a +# XML block): any ... value is treated as terminal +# regardless of the reported (completed, failed, killed, +# cancelled, interrupted, ...). +# +# pending := launched \ completed +# +# Usage: list_pending_background_task_ids "$transcript_path" +# - Outputs one id per line on stdout (possibly empty). +# - Returns 0 when the transcript is readable (including when there are +# no pending tasks). Returns 1 when the transcript path is empty, not +# a regular file, or jq is unavailable, so callers must treat non-zero +# as "unknown -> do not short-circuit". +list_pending_background_task_ids() { + local transcript_path="$1" + + if [[ -z "$transcript_path" ]] || [[ ! -f "$transcript_path" ]]; then + return 1 + fi + if ! command -v jq >/dev/null 2>&1; then + return 1 + fi + + local launched completed + launched=$(jq -r ' + select(.toolUseResult != null) + | select( + (.toolUseResult.isAsync == true and (.toolUseResult.agentId // "") != "") + or ((.toolUseResult.backgroundTaskId // "") != "") + ) + | (.toolUseResult.agentId // .toolUseResult.backgroundTaskId) + ' "$transcript_path" 2>/dev/null | sort -u) || return 1 + + completed=$(jq -r ' + select(.type == "queue-operation" and .operation == "enqueue") + | (.content // "" | tostring) + | select(contains("")) + ' "$transcript_path" 2>/dev/null \ + | grep -oE '[^<]+' \ + | sed -E 's|||g' \ + | sort -u) || completed="" + + # Emit launched ids that have no matching completion notification. + comm -23 \ + <(printf '%s\n' "$launched" | sed '/^$/d') \ + <(printf '%s\n' "$completed" | sed '/^$/d') +} + +# Returns 0 when the transcript shows at least one pending background task. +# Returns 1 when no pending tasks are detected (including fail-closed cases +# like missing transcript, non-file path, or jq unavailable). +# +# Usage: has_pending_background_tasks "$transcript_path" +has_pending_background_tasks() { + local transcript_path="$1" + local pending + pending=$(list_pending_background_task_ids "$transcript_path" 2>/dev/null) || return 1 + [[ -n "$pending" ]] +} + +# Prints the count of pending background tasks to stdout. Prints 0 for any +# error case so callers can still format messages safely. +# +# Usage: count_pending_background_tasks "$transcript_path" +count_pending_background_tasks() { + local transcript_path="$1" + local pending + pending=$(list_pending_background_task_ids "$transcript_path" 2>/dev/null) || { + echo 0 + return 0 + } + if [[ -z "$pending" ]]; then + echo 0 + else + printf '%s\n' "$pending" | sed '/^$/d' | wc -l | tr -d ' ' + fi +} + # Resolve the active state file for a loop directory # Checks for finalize-state.md first, then state.md # Usage: resolve_active_state_file "$loop_dir" diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 570a4867..5b57a3cb 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -68,6 +68,30 @@ if [[ -z "$LOOP_DIR" ]]; then exit 0 fi +# ======================================== +# Early Exit: Pending Background Tasks +# ======================================== +# When the main Claude Code session has dispatched background work (Agent with +# run_in_background=true, or Bash with run_in_background=true) whose +# completion notifications have not yet arrived, the natural "stop" is simply +# "I am waiting for the background task". Running git/summary/BitLesson/Codex +# gates in that state wastes Codex tokens and produces low-signal reviews. +# +# Allow the stop (exit 0) and emit a user-visible systemMessage so nobody +# mistakes the pause for loop completion. The on-disk loop state is left +# untouched -- the next natural stop (after background work finishes) will +# re-enter this hook with no pending tasks and run the normal flow. +# +# This check MUST run before any other gate (phase detection, state parsing, +# branch / plan / git-clean / summary / max-iter checks, Codex review). +HOOK_TRANSCRIPT_PATH=$(extract_transcript_path "$HOOK_INPUT") +if has_pending_background_tasks "$HOOK_TRANSCRIPT_PATH"; then + PENDING_BG_COUNT=$(count_pending_background_tasks "$HOOK_TRANSCRIPT_PATH") + jq -n --arg count "$PENDING_BG_COUNT" \ + '{systemMessage: ("RLCR loop active. " + $count + " background task(s) still running - stop allowed naturally; loop has NOT terminated and will resume on completion.")}' + exit 0 +fi + # ======================================== # Detect Loop Phase: Normal or Finalize # ======================================== diff --git a/scripts/rlcr-stop-gate.sh b/scripts/rlcr-stop-gate.sh index 09166100..1e928892 100755 --- a/scripts/rlcr-stop-gate.sh +++ b/scripts/rlcr-stop-gate.sh @@ -83,9 +83,15 @@ if ! command -v jq >/dev/null 2>&1; then exit 20 fi -# Build hook input JSON while omitting empty fields. -# Include standard Stop hook fields so the underlying hook sees the same schema -# as a real Claude Code Stop event (hook_event_name, stop_hook_active, cwd). +# Build hook input JSON. Include standard Stop hook fields so the underlying +# hook sees the same schema as a real Claude Code Stop event +# (hook_event_name, stop_hook_active, cwd). +# +# Empty session_id / transcript_path become explicit null instead of being +# filtered out; a `select(length > 0)` used as a plain object value collapses +# the entire enclosing object to empty whenever any selected field is empty, +# which would hide forwarded fields like transcript_path when only session_id +# is missing. HOOK_INPUT=$(jq -n \ --arg session_id "$SESSION_ID" \ --arg transcript_path "$TRANSCRIPT_PATH" \ @@ -99,8 +105,8 @@ HOOK_INPUT=$(jq -n \ model: $model, permission_mode: $permission_mode, last_assistant_message: null, - session_id: ($session_id | select(length > 0)), - transcript_path: ($transcript_path | select(length > 0)) + session_id: (if ($session_id | length) > 0 then $session_id else null end), + transcript_path: (if ($transcript_path | length) > 0 then $transcript_path else null end) }') # Capture hook exit code explicitly to map non-zero to exit 20 (wrapper error) @@ -140,6 +146,20 @@ if [[ "$DECISION" == "block" ]]; then exit 10 fi +# No decision field in the JSON: per Claude Code Stop-hook spec this means +# allow the stop. Surface any systemMessage so callers see the reason +# (e.g. "background task(s) still running"), then exit 0. +if [[ -z "$DECISION" ]]; then + if [[ "$PRINT_JSON" == "true" ]]; then + printf '%s\n' "$HOOK_OUTPUT" + elif [[ -n "$SYSTEM_MESSAGE" ]]; then + printf 'ALLOW: %s\n' "$SYSTEM_MESSAGE" + else + echo "ALLOW: stop gate passed." + fi + exit 0 +fi + echo "Error: Unexpected hook decision: ${DECISION:-}" >&2 printf '%s\n' "$HOOK_OUTPUT" >&2 exit 20 diff --git a/tests/run-all-tests.sh b/tests/run-all-tests.sh index b6ba6b24..a39d9ab1 100755 --- a/tests/run-all-tests.sh +++ b/tests/run-all-tests.sh @@ -68,6 +68,7 @@ TEST_SUITES=( "test-templates-comprehensive.sh" "test-plan-file-hooks.sh" "test-stop-hook-legacy-compat.sh" + "test-stop-hook-bg-allow.sh" "test-error-scenarios.sh" "test-ansi-parsing.sh" "test-allowlist-validators.sh" diff --git a/tests/test-stop-hook-bg-allow.sh b/tests/test-stop-hook-bg-allow.sh new file mode 100755 index 00000000..c1e89853 --- /dev/null +++ b/tests/test-stop-hook-bg-allow.sh @@ -0,0 +1,467 @@ +#!/usr/bin/env bash +# +# Tests for the background-task short-circuit in loop-codex-stop-hook.sh. +# +# When the current Claude Code session has dispatched background work that has +# not yet completed (via Agent run_in_background=true or Bash +# run_in_background=true), the RLCR stop hook must exit 0 with a user-facing +# systemMessage instead of running any gate or Codex review. The on-disk loop +# state must remain unchanged, so that the next natural stop (after the +# background task finishes) re-enters the normal review flow. +# +# Acceptance criteria exercised here (see +# .humanize/rlcr/2026-04-16_13-19-26/goal-tracker.md for authoritative list): +# AC-1 no bg dispatches -> normal Codex flow +# AC-2 pending subagent -> exit 0 + systemMessage +# AC-3 pending shell -> exit 0 + systemMessage +# AC-4 subagent launch + complete -> normal Codex flow +# AC-5 2 subagents + 1 shell -> systemMessage mentions "3 background" +# AC-6 missing transcript path -> normal Codex flow (fail-closed) +# AC-7 no active loop -> exit 0, no systemMessage, no Codex +# AC-8 finalize phase pending bg -> exit 0 + systemMessage +# AC-9 via rlcr-stop-gate.sh -> exit 0 (wrapper ALLOW) +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" + +STOP_HOOK="$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" +GATE_SCRIPT="$PROJECT_ROOT/scripts/rlcr-stop-gate.sh" + +setup_test_dir + +export XDG_CACHE_HOME="$TEST_DIR/.cache" +mkdir -p "$XDG_CACHE_HOME" + +# ---------------------------------------------------------------------- +# Mock codex CLI: records an invocation marker and prints canned feedback. +# ---------------------------------------------------------------------- +setup_mock_codex() { + mkdir -p "$TEST_DIR/bin" + cat > "$TEST_DIR/bin/codex" << 'EOF' +#!/usr/bin/env bash +if [[ -n "${MOCK_CODEX_MARKER:-}" ]]; then + : > "$MOCK_CODEX_MARKER" +fi +printf '%s\n' "${MOCK_CODEX_OUTPUT:-Mock review feedback}" +exit 0 +EOF + chmod +x "$TEST_DIR/bin/codex" + export PATH="$TEST_DIR/bin:$PATH" +} + +# ---------------------------------------------------------------------- +# Build a minimal "active loop" project that satisfies every gate the +# stop hook enforces BEFORE it calls Codex (so tests that want to reach +# the Codex review flow can pass cleanly when bg-pending is not expected). +# ---------------------------------------------------------------------- +create_full_fixture() { + local repo_dir="$1" + local finalize_phase="${2:-false}" + + init_test_git_repo "$repo_dir" + + printf 'plans/\n' > "$repo_dir/.gitignore" + git -C "$repo_dir" add .gitignore + git -C "$repo_dir" commit -q -m "Add test gitignore" + + mkdir -p "$repo_dir/plans" + cat > "$repo_dir/plans/test-plan.md" << 'EOF' +# Test Plan + +Exercise the background-task short-circuit. +EOF + + local branch base_commit loop_dir + branch=$(git -C "$repo_dir" rev-parse --abbrev-ref HEAD) + base_commit=$(git -C "$repo_dir" rev-parse HEAD) + loop_dir="$repo_dir/.humanize/rlcr/2026-03-01_00-00-00" + mkdir -p "$loop_dir" + + cp "$repo_dir/plans/test-plan.md" "$loop_dir/plan.md" + + local state_name="state.md" + if [[ "$finalize_phase" == "true" ]]; then + state_name="finalize-state.md" + fi + + cat > "$loop_dir/$state_name" << EOF +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.4 +codex_effort: high +codex_timeout: 60 +push_every_round: false +full_review_round: 5 +plan_file: "plans/test-plan.md" +plan_tracked: false +start_branch: $branch +base_branch: $branch +base_commit: $base_commit +review_started: false +ask_codex_question: false +agent_teams: false +--- +EOF + + local summary_name="round-0-summary.md" + if [[ "$finalize_phase" == "true" ]]; then + summary_name="finalize-summary.md" + fi + cat > "$loop_dir/$summary_name" << 'EOF' +# Summary + +Exercised the background-task short-circuit. +EOF + + cat > "$loop_dir/goal-tracker.md" << 'EOF' +# Goal Tracker +## IMMUTABLE SECTION +### Ultimate Goal +Exercise background-task short-circuit. +### Acceptance Criteria +- AC-1: Hook reaches Codex review when no bg tasks are pending. +## MUTABLE SECTION +### Plan Version: 1 (Updated: Round 0) +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| Exercise stop hook | AC-1 | completed | - | +EOF + + # Echo the loop dir so callers can reach state artifacts. + echo "$loop_dir" +} + +# A project with no RLCR state file at all. +create_empty_project() { + local repo_dir="$1" + init_test_git_repo "$repo_dir" +} + +# ---------------------------------------------------------------------- +# Transcript fixture builders. +# Each prints a JSONL transcript to stdout. +# ---------------------------------------------------------------------- +emit_tool_use_assistant() { + local tool_use_id="$1" tool_name="$2" extra_input_json="$3" + local input_json="{\"run_in_background\":true${extra_input_json}}" + jq -c -n \ + --arg id "$tool_use_id" \ + --arg name "$tool_name" \ + --argjson input "$input_json" \ + '{ + type:"assistant", + message:{ + role:"assistant", + content:[ + {type:"tool_use", id:$id, name:$name, input:$input} + ] + } + }' +} + +emit_async_agent_launch_result() { + local tool_use_id="$1" agent_id="$2" + jq -c -n \ + --arg id "$tool_use_id" \ + --arg aid "$agent_id" \ + '{ + type:"user", + message:{ + role:"user", + content:[{tool_use_id:$id, type:"tool_result", + content:[{type:"text", text:"Async agent launched"}]}] + }, + toolUseResult:{isAsync:true, status:"async_launched", agentId:$aid} + }' +} + +emit_bg_shell_launch_result() { + local tool_use_id="$1" bg_task_id="$2" + jq -c -n \ + --arg id "$tool_use_id" \ + --arg bid "$bg_task_id" \ + '{ + type:"user", + message:{ + role:"user", + content:[{tool_use_id:$id, type:"tool_result", + content:[{type:"text", text:"Shell started in background"}]}] + }, + toolUseResult:{backgroundTaskId:$bid} + }' +} + +emit_task_completion_event() { + local task_id="$1" tool_use_id="$2" status="${3:-completed}" + local notif + notif=$(printf '\n%s\n%s\n%s\n' \ + "$task_id" "$tool_use_id" "$status") + jq -c -n --arg content "$notif" \ + '{type:"queue-operation", operation:"enqueue", content:$content}' +} + +write_transcript() { + local path="$1" + shift + : > "$path" + for line in "$@"; do + printf '%s\n' "$line" >> "$path" + done +} + +# ---------------------------------------------------------------------- +# Invoke the stop hook with a crafted hook input JSON. +# Sets RUN_EXIT_CODE, RUN_OUTPUT, RUN_MARKER. +# ---------------------------------------------------------------------- +run_stop_hook_with_input() { + local repo_dir="$1" hook_input_json="$2" + + RUN_MARKER="$repo_dir/codex-called.marker" + rm -f "$RUN_MARKER" + + set +e + RUN_OUTPUT=$( + cd "$repo_dir" + CLAUDE_PROJECT_DIR="$repo_dir" \ + MOCK_CODEX_MARKER="$RUN_MARKER" \ + MOCK_CODEX_OUTPUT="Mock review feedback" \ + "$STOP_HOOK" <<<"$hook_input_json" 2>&1 + ) + RUN_EXIT_CODE=$? + set -e +} + +assert_systemmessage_only() { + local test_name="$1" repo_dir="$2" state_file="$3" expected_count_regex="$4" + + local before_hash after_hash + before_hash=$(sha256sum "$state_file" 2>/dev/null | awk '{print $1}') + + if [[ "$RUN_EXIT_CODE" -ne 0 ]]; then + fail "$test_name" "exit 0 with systemMessage" \ + "exit $RUN_EXIT_CODE; output: $RUN_OUTPUT" + return + fi + if [[ -f "$RUN_MARKER" ]]; then + fail "$test_name" "Codex NOT invoked" \ + "marker present (Codex was called); output: $RUN_OUTPUT" + return + fi + local system_message + system_message=$(printf '%s' "$RUN_OUTPUT" | jq -r '.systemMessage // empty' 2>/dev/null || echo "") + if [[ -z "$system_message" ]]; then + fail "$test_name" "JSON output with systemMessage" \ + "no systemMessage in output: $RUN_OUTPUT" + return + fi + if [[ -n "$expected_count_regex" ]]; then + if ! printf '%s' "$system_message" | grep -Eq "$expected_count_regex"; then + fail "$test_name" \ + "systemMessage matches /$expected_count_regex/" \ + "got: $system_message" + return + fi + fi + after_hash=$(sha256sum "$state_file" 2>/dev/null | awk '{print $1}') + if [[ "$before_hash" != "$after_hash" ]]; then + fail "$test_name" "state file unchanged" \ + "hash changed ($before_hash -> $after_hash)" + return + fi + pass "$test_name" +} + +assert_reached_codex() { + local test_name="$1" + if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ -f "$RUN_MARKER" ]]; then + pass "$test_name" + else + fail "$test_name" "exit 0 and Codex invoked (marker present)" \ + "exit $RUN_EXIT_CODE, marker=$(test -f "$RUN_MARKER" && echo present || echo missing); output: $RUN_OUTPUT" + fi +} + +setup_mock_codex + +# Transcripts live outside any test repo to avoid tripping git cleanliness +# gates in the stop hook. +TRANSCRIPTS_DIR="$TEST_DIR/transcripts" +mkdir -p "$TRANSCRIPTS_DIR" + +echo "==========================================" +echo "Stop Hook Background-Task Allow Tests" +echo "==========================================" +echo "" + +# ---------------- AC-1 ---------------- +echo "Test AC-1: No bg dispatches -> reaches Codex" +AC1_REPO="$TEST_DIR/ac1" +create_full_fixture "$AC1_REPO" > /dev/null +AC1_TRANSCRIPT="$TRANSCRIPTS_DIR/ac1.jsonl" +write_transcript "$AC1_TRANSCRIPT" '{"type":"user","message":{"role":"user","content":"hello"}}' + +AC1_INPUT=$(jq -c -n --arg tp "$AC1_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC1_REPO" "$AC1_INPUT" +assert_reached_codex "AC-1: transcript without bg dispatches proceeds to Codex review" + +# ---------------- AC-2 ---------------- +echo "Test AC-2: One pending background subagent -> exit 0 + systemMessage" +AC2_REPO="$TEST_DIR/ac2" +AC2_LOOP=$(create_full_fixture "$AC2_REPO") +AC2_STATE="$AC2_LOOP/state.md" +AC2_TRANSCRIPT="$TRANSCRIPTS_DIR/ac2.jsonl" +AC2_LINE_LAUNCH=$(emit_tool_use_assistant "toolu_A" "Agent" ',"description":"x","prompt":"x"') +AC2_LINE_RESULT=$(emit_async_agent_launch_result "toolu_A" "agent_pending_A") +write_transcript "$AC2_TRANSCRIPT" "$AC2_LINE_LAUNCH" "$AC2_LINE_RESULT" + +AC2_INPUT=$(jq -c -n --arg tp "$AC2_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC2_REPO" "$AC2_INPUT" +assert_systemmessage_only \ + "AC-2: pending subagent triggers exit 0 + systemMessage, state untouched" \ + "$AC2_REPO" "$AC2_STATE" "1 background task" + +# ---------------- AC-3 ---------------- +echo "Test AC-3: One pending background shell -> exit 0 + systemMessage" +AC3_REPO="$TEST_DIR/ac3" +AC3_LOOP=$(create_full_fixture "$AC3_REPO") +AC3_STATE="$AC3_LOOP/state.md" +AC3_TRANSCRIPT="$TRANSCRIPTS_DIR/ac3.jsonl" +AC3_LINE_LAUNCH=$(emit_tool_use_assistant "toolu_B" "Bash" ',"command":"sleep 30"') +AC3_LINE_RESULT=$(emit_bg_shell_launch_result "toolu_B" "shell_pending_B") +write_transcript "$AC3_TRANSCRIPT" "$AC3_LINE_LAUNCH" "$AC3_LINE_RESULT" + +AC3_INPUT=$(jq -c -n --arg tp "$AC3_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC3_REPO" "$AC3_INPUT" +assert_systemmessage_only \ + "AC-3: pending background shell triggers exit 0 + systemMessage" \ + "$AC3_REPO" "$AC3_STATE" "1 background task" + +# ---------------- AC-4 ---------------- +echo "Test AC-4: Launched subagent with completion notification -> reaches Codex" +AC4_REPO="$TEST_DIR/ac4" +create_full_fixture "$AC4_REPO" > /dev/null +AC4_TRANSCRIPT="$TRANSCRIPTS_DIR/ac4.jsonl" +AC4_LAUNCH=$(emit_tool_use_assistant "toolu_C" "Agent" ',"description":"x","prompt":"x"') +AC4_RESULT=$(emit_async_agent_launch_result "toolu_C" "agent_done_C") +AC4_COMPLETE=$(emit_task_completion_event "agent_done_C" "toolu_C" "completed") +write_transcript "$AC4_TRANSCRIPT" "$AC4_LAUNCH" "$AC4_RESULT" "$AC4_COMPLETE" + +AC4_INPUT=$(jq -c -n --arg tp "$AC4_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC4_REPO" "$AC4_INPUT" +assert_reached_codex "AC-4: subagent with matching completion notification proceeds to Codex review" + +# ---------------- AC-5 ---------------- +echo "Test AC-5: 2 pending subagents + 1 pending shell -> systemMessage mentions 3" +AC5_REPO="$TEST_DIR/ac5" +AC5_LOOP=$(create_full_fixture "$AC5_REPO") +AC5_STATE="$AC5_LOOP/state.md" +AC5_TRANSCRIPT="$TRANSCRIPTS_DIR/ac5.jsonl" +AC5_L1_LAUNCH=$(emit_tool_use_assistant "toolu_D1" "Agent" ',"description":"x","prompt":"x"') +AC5_L1_RESULT=$(emit_async_agent_launch_result "toolu_D1" "agent_pending_D1") +AC5_L2_LAUNCH=$(emit_tool_use_assistant "toolu_D2" "Agent" ',"description":"y","prompt":"y"') +AC5_L2_RESULT=$(emit_async_agent_launch_result "toolu_D2" "agent_pending_D2") +AC5_L3_LAUNCH=$(emit_tool_use_assistant "toolu_D3" "Bash" ',"command":"sleep 30"') +AC5_L3_RESULT=$(emit_bg_shell_launch_result "toolu_D3" "shell_pending_D3") +write_transcript "$AC5_TRANSCRIPT" \ + "$AC5_L1_LAUNCH" "$AC5_L1_RESULT" \ + "$AC5_L2_LAUNCH" "$AC5_L2_RESULT" \ + "$AC5_L3_LAUNCH" "$AC5_L3_RESULT" + +AC5_INPUT=$(jq -c -n --arg tp "$AC5_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC5_REPO" "$AC5_INPUT" +assert_systemmessage_only \ + "AC-5: 2 pending subagents + 1 pending shell -> systemMessage mentions '3 background task(s)'" \ + "$AC5_REPO" "$AC5_STATE" "3 background task\\(s\\)" + +# ---------------- AC-6 ---------------- +echo "Test AC-6: missing transcript path -> reaches Codex (fail-closed)" +AC6_REPO="$TEST_DIR/ac6" +create_full_fixture "$AC6_REPO" > /dev/null +AC6_INPUT=$(jq -c -n --arg tp "/nonexistent/file-$$.jsonl" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC6_REPO" "$AC6_INPUT" +assert_reached_codex "AC-6: missing transcript_path proceeds to Codex review (fail-closed)" + +# Also: empty transcript_path field +AC6B_REPO="$TEST_DIR/ac6b" +create_full_fixture "$AC6B_REPO" > /dev/null +AC6B_INPUT='{"transcript_path":""}' +run_stop_hook_with_input "$AC6B_REPO" "$AC6B_INPUT" +assert_reached_codex "AC-6b: empty transcript_path string proceeds to Codex review" + +# And: no transcript_path key at all +AC6C_REPO="$TEST_DIR/ac6c" +create_full_fixture "$AC6C_REPO" > /dev/null +AC6C_INPUT='{}' +run_stop_hook_with_input "$AC6C_REPO" "$AC6C_INPUT" +assert_reached_codex "AC-6c: hook input with no transcript_path proceeds to Codex review" + +# ---------------- AC-7 ---------------- +echo "Test AC-7: No active loop -> exit 0, no systemMessage, no Codex" +AC7_REPO="$TEST_DIR/ac7" +create_empty_project "$AC7_REPO" +AC7_TRANSCRIPT="$TRANSCRIPTS_DIR/ac7.jsonl" +AC7_LAUNCH=$(emit_tool_use_assistant "toolu_E" "Agent" ',"description":"x","prompt":"x"') +AC7_RESULT=$(emit_async_agent_launch_result "toolu_E" "agent_pending_E") +write_transcript "$AC7_TRANSCRIPT" "$AC7_LAUNCH" "$AC7_RESULT" +AC7_INPUT=$(jq -c -n --arg tp "$AC7_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC7_REPO" "$AC7_INPUT" + +AC7_SYS_MSG=$(printf '%s' "$RUN_OUTPUT" | jq -r '.systemMessage // empty' 2>/dev/null || echo "") +if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ ! -f "$RUN_MARKER" ]] && [[ -z "$AC7_SYS_MSG" ]]; then + pass "AC-7: no active loop takes original exit-0 path without systemMessage" +else + fail "AC-7: no active loop takes original exit-0 path without systemMessage" \ + "exit 0, no Codex marker, no systemMessage" \ + "exit $RUN_EXIT_CODE, marker=$(test -f "$RUN_MARKER" && echo present || echo missing), systemMessage='$AC7_SYS_MSG'; output: $RUN_OUTPUT" +fi + +# ---------------- AC-8 ---------------- +echo "Test AC-8: Finalize phase + pending bg -> exit 0 + systemMessage" +AC8_REPO="$TEST_DIR/ac8" +AC8_LOOP=$(create_full_fixture "$AC8_REPO" true) +AC8_STATE="$AC8_LOOP/finalize-state.md" +AC8_TRANSCRIPT="$TRANSCRIPTS_DIR/ac8.jsonl" +AC8_LAUNCH=$(emit_tool_use_assistant "toolu_F" "Agent" ',"description":"x","prompt":"x"') +AC8_RESULT=$(emit_async_agent_launch_result "toolu_F" "agent_pending_F") +write_transcript "$AC8_TRANSCRIPT" "$AC8_LAUNCH" "$AC8_RESULT" +AC8_INPUT=$(jq -c -n --arg tp "$AC8_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC8_REPO" "$AC8_INPUT" +assert_systemmessage_only \ + "AC-8: finalize phase with pending bg task -> exit 0 + systemMessage" \ + "$AC8_REPO" "$AC8_STATE" "1 background task" + +# ---------------- AC-9 ---------------- +echo "Test AC-9: rlcr-stop-gate.sh forwards transcript_path to hook" +AC9_REPO="$TEST_DIR/ac9" +create_full_fixture "$AC9_REPO" > /dev/null +AC9_TRANSCRIPT="$TRANSCRIPTS_DIR/ac9.jsonl" +AC9_LAUNCH=$(emit_tool_use_assistant "toolu_G" "Agent" ',"description":"x","prompt":"x"') +AC9_RESULT=$(emit_async_agent_launch_result "toolu_G" "agent_pending_G") +write_transcript "$AC9_TRANSCRIPT" "$AC9_LAUNCH" "$AC9_RESULT" + +AC9_OUT="$AC9_REPO/gate-out.txt" +set +e +( + cd "$AC9_REPO" + "$GATE_SCRIPT" --transcript-path "$AC9_TRANSCRIPT" +) > "$AC9_OUT" 2>&1 +AC9_EXIT=$? +set -e + +if [[ "$AC9_EXIT" -eq 0 ]] && grep -q "^ALLOW:" "$AC9_OUT"; then + pass "AC-9: rlcr-stop-gate.sh exits 0 with ALLOW when bg tasks are pending" +else + AC9_BODY=$(cat "$AC9_OUT" 2>/dev/null || true) + fail "AC-9: rlcr-stop-gate.sh exits 0 with ALLOW when bg tasks are pending" \ + "exit 0 and output containing ALLOW:" \ + "exit $AC9_EXIT; output: $AC9_BODY" +fi + +print_test_summary "Stop Hook Background-Task Allow Test Summary" +exit $? From 3adf8ef8cdbcc6c2d055ebd4e33909fac38b4c6b Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 13:45:48 -0700 Subject: [PATCH 57/97] Expand '~' in transcript_path before background-task file check Round 0 shipped the natural-stop short-circuit using a literal `-f` check on the transcript path, which silently failed for the "~/.claude/projects//.jsonl" form Claude Code actually publishes. A real session landed on the fall-through path and still ran the full Codex review, defeating the feature. Add a small `expand_leading_tilde` helper in loop-common.sh (no eval, handles bare "~" and "~/..." only, leaves everything else verbatim) and apply it in both extract_transcript_path and list_pending_background_task_ids. The stop hook and the rlcr-stop-gate wrapper inherit the fix through the shared helpers without any call-site edits. Regression coverage in tests/test-stop-hook-bg-allow.sh: AC-10 hook input with transcript_path="~/...\" under $HOME still triggers the short-circuit (exit 0 + systemMessage). AC-10b direct list_pending_background_task_ids on the same tilde form returns the pending id, so a future regression in the helper cannot be masked by additional normalization in the hook itself. Under-$HOME fixture dir is cleaned alongside $TEST_DIR via an extended EXIT trap. Full suite: 1692 passed, 0 failed (+2 vs Round 0). Branch still targets dev; version stays at 1.16.0. --- hooks/lib/loop-common.sh | 28 +++++++++++++++++-- tests/test-stop-hook-bg-allow.sh | 48 ++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 3 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index a2119511..3b210793 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -242,12 +242,29 @@ extract_session_id() { printf '%s' "$input" | jq -r '.session_id // empty' 2>/dev/null || echo "" } -# Extract transcript_path from hook JSON input +# Expand a leading "~" or "~/" in a path to "$HOME" without using eval. +# Only the bare "~" and "~/..." forms are expanded; "~user/..." and every +# other input (absolute path, relative path, empty string) is returned verbatim. +# +# Usage: expand_leading_tilde "$path" +# Prints the normalized path to stdout. +expand_leading_tilde() { + local path="$1" + case "$path" in + '~') printf '%s' "${HOME:-}" ;; + '~/'*) printf '%s/%s' "${HOME:-}" "${path#'~/'}" ;; + *) printf '%s' "$path" ;; + esac +} + +# Extract transcript_path from hook JSON input and expand any leading tilde. # Usage: extract_transcript_path "$json_input" -# Outputs the transcript_path to stdout, or empty string if not available +# Outputs the transcript_path to stdout, or empty string if not available. extract_transcript_path() { local input="$1" - printf '%s' "$input" | jq -r '.transcript_path // empty' 2>/dev/null || echo "" + local raw + raw=$(printf '%s' "$input" | jq -r '.transcript_path // empty' 2>/dev/null || echo "") + expand_leading_tilde "$raw" } # Enumerate background-task ids that have been launched but not yet marked @@ -276,6 +293,11 @@ extract_transcript_path() { list_pending_background_task_ids() { local transcript_path="$1" + # Normalize a leading tilde so direct callers (tests, ad-hoc scripts) + # work correctly even when transcript_path was not routed through + # extract_transcript_path. + transcript_path=$(expand_leading_tilde "$transcript_path") + if [[ -z "$transcript_path" ]] || [[ ! -f "$transcript_path" ]]; then return 1 fi diff --git a/tests/test-stop-hook-bg-allow.sh b/tests/test-stop-hook-bg-allow.sh index c1e89853..a8025f75 100755 --- a/tests/test-stop-hook-bg-allow.sh +++ b/tests/test-stop-hook-bg-allow.sh @@ -33,6 +33,16 @@ GATE_SCRIPT="$PROJECT_ROOT/scripts/rlcr-stop-gate.sh" setup_test_dir +# AC-10 needs a transcript living under $HOME so the hook input can use +# the "~/..." form. Create the fixture dir in the AC-10 block and extend +# the EXIT trap set by setup_test_dir to clean both directories on shutdown. +HOME_FIXTURE_DIR="" +cleanup_all() { + rm -rf "$TEST_DIR" + [[ -n "$HOME_FIXTURE_DIR" ]] && rm -rf "$HOME_FIXTURE_DIR" +} +trap cleanup_all EXIT + export XDG_CACHE_HOME="$TEST_DIR/.cache" mkdir -p "$XDG_CACHE_HOME" @@ -463,5 +473,43 @@ else "exit $AC9_EXIT; output: $AC9_BODY" fi +# ---------------- AC-10 ---------------- +# Regression: real sessions pass transcript_path as "~/.claude/projects/...". +# Without tilde expansion the file check `[[ -f "~/..." ]]` is always false, +# so the short-circuit silently misses pending background tasks. +echo "Test AC-10: '~/...' transcript path still triggers short-circuit" +AC10_REPO="$TEST_DIR/ac10" +AC10_LOOP=$(create_full_fixture "$AC10_REPO") +AC10_STATE="$AC10_LOOP/state.md" + +HOME_FIXTURE_DIR=$(mktemp -d "$HOME/.humanize-bg-allow-test-XXXXXX") +AC10_TRANSCRIPT="$HOME_FIXTURE_DIR/ac10.jsonl" +AC10_LAUNCH=$(emit_tool_use_assistant "toolu_H" "Agent" ',"description":"x","prompt":"x"') +AC10_RESULT=$(emit_async_agent_launch_result "toolu_H" "agent_pending_H") +write_transcript "$AC10_TRANSCRIPT" "$AC10_LAUNCH" "$AC10_RESULT" + +# Build the tilde-form string literally. Do NOT let the shell expand "~". +AC10_TILDE_PATH="~/${AC10_TRANSCRIPT#$HOME/}" +AC10_INPUT=$(jq -c -n --arg tp "$AC10_TILDE_PATH" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC10_REPO" "$AC10_INPUT" +assert_systemmessage_only \ + "AC-10: '~/'-prefixed transcript_path is expanded and short-circuits on pending bg" \ + "$AC10_REPO" "$AC10_STATE" "1 background task" + +# Also prove the helper works directly against a "~/..." argument. +# This avoids masking a helper regression behind the hook's own expansion. +AC10_HELPER_OUT=$( + cd "$AC10_REPO" + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + list_pending_background_task_ids "$AC10_TILDE_PATH" 2>/dev/null | sort -u +) +if printf '%s\n' "$AC10_HELPER_OUT" | grep -qx 'agent_pending_H'; then + pass "AC-10b: list_pending_background_task_ids expands '~/...' directly" +else + fail "AC-10b: list_pending_background_task_ids expands '~/...' directly" \ + "output containing 'agent_pending_H'" "$AC10_HELPER_OUT" +fi + print_test_summary "Stop Hook Background-Task Allow Test Summary" exit $? From 38691dd2c860472c637519253fc82abe054a7505 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 13:55:27 -0700 Subject: [PATCH 58/97] Make tilde-path regressions portable and cover the wrapper too Two test-harness gaps flagged in the previous Codex review: 1. The tilde-path fixture was created with mktemp -d "$HOME/...", which aborts on sandboxed or read-only-HOME environments and forces callers to override HOME externally. 2. AC-9 with the literal "~/..." --transcript-path form was named as a target for the round but never made it into automation; the checked-in wrapper coverage still used an absolute path. Fix both inside tests/test-stop-hook-bg-allow.sh only -- no production code changes. The helper, hook, and wrapper already handle the tilde form correctly; the missing piece was portable, checked-in proof. - Introduce FAKE_HOME="$TEST_DIR/fake-home" rooted inside the existing test temp dir, so cleanup rides on the setup_test_dir EXIT trap. The previous cleanup_all / HOME_FIXTURE_DIR extension is removed. - Extend run_stop_hook_with_input with an optional 3rd argument that exports HOME inside the hook subshell when non-empty. Absent, behavior is verbatim. - Refactor AC-10 and AC-10b to point at "$FAKE_HOME/session-data/ac10.jsonl", and pass the tilde-form literal "~/session-data/ac10.jsonl" verbatim. The previous dynamic derivation "~/${AC10_TRANSCRIPT#$HOME/}" is gone. - Add AC-10c: exercises scripts/rlcr-stop-gate.sh with --transcript-path "~/..." under HOME=$FAKE_HOME and asserts exit 0 + "^ALLOW:" + "background task". Validation: - bash tests/test-stop-hook-bg-allow.sh -> 14 passed, 0 failed (baseline 13; +1 for AC-10c). - bash tests/run-all-tests.sh -> 1693 passed, 0 failed (baseline 1692; +1). - Portability: HOME=/nonexistent/readonly bash tests/test-stop-hook-bg-allow.sh -> 14 passed, 0 failed. Branch still targets dev; version stays at 1.16.0. --- tests/test-stop-hook-bg-allow.sh | 72 +++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/tests/test-stop-hook-bg-allow.sh b/tests/test-stop-hook-bg-allow.sh index a8025f75..408400a6 100755 --- a/tests/test-stop-hook-bg-allow.sh +++ b/tests/test-stop-hook-bg-allow.sh @@ -33,19 +33,18 @@ GATE_SCRIPT="$PROJECT_ROOT/scripts/rlcr-stop-gate.sh" setup_test_dir -# AC-10 needs a transcript living under $HOME so the hook input can use -# the "~/..." form. Create the fixture dir in the AC-10 block and extend -# the EXIT trap set by setup_test_dir to clean both directories on shutdown. -HOME_FIXTURE_DIR="" -cleanup_all() { - rm -rf "$TEST_DIR" - [[ -n "$HOME_FIXTURE_DIR" ]] && rm -rf "$HOME_FIXTURE_DIR" -} -trap cleanup_all EXIT - export XDG_CACHE_HOME="$TEST_DIR/.cache" mkdir -p "$XDG_CACHE_HOME" +# Fake HOME rooted inside $TEST_DIR so the tilde-path regressions (AC-10, +# AC-10b, AC-10c) do not write into the real user home. The hook, helper, +# and wrapper invocations that need tilde expansion run with HOME set to +# this directory; every other invocation keeps the real HOME. Cleanup is +# covered by the setup_test_dir EXIT trap because FAKE_HOME is under +# $TEST_DIR. +FAKE_HOME="$TEST_DIR/fake-home" +mkdir -p "$FAKE_HOME" + # ---------------------------------------------------------------------- # Mock codex CLI: records an invocation marker and prints canned feedback. # ---------------------------------------------------------------------- @@ -226,11 +225,14 @@ write_transcript() { } # ---------------------------------------------------------------------- -# Invoke the stop hook with a crafted hook input JSON. +# Invoke the stop hook with a crafted hook input JSON. The optional third +# argument overrides HOME for the hook invocation only, so tilde-path +# regressions can point at a fake HOME rooted under $TEST_DIR without +# leaking into the real user home. # Sets RUN_EXIT_CODE, RUN_OUTPUT, RUN_MARKER. # ---------------------------------------------------------------------- run_stop_hook_with_input() { - local repo_dir="$1" hook_input_json="$2" + local repo_dir="$1" hook_input_json="$2" home_override="${3:-}" RUN_MARKER="$repo_dir/codex-called.marker" rm -f "$RUN_MARKER" @@ -238,6 +240,7 @@ run_stop_hook_with_input() { set +e RUN_OUTPUT=$( cd "$repo_dir" + [[ -n "$home_override" ]] && export HOME="$home_override" CLAUDE_PROJECT_DIR="$repo_dir" \ MOCK_CODEX_MARKER="$RUN_MARKER" \ MOCK_CODEX_OUTPUT="Mock review feedback" \ @@ -473,33 +476,40 @@ else "exit $AC9_EXIT; output: $AC9_BODY" fi -# ---------------- AC-10 ---------------- +# ---------------- AC-10 / AC-10b / AC-10c ---------------- # Regression: real sessions pass transcript_path as "~/.claude/projects/...". # Without tilde expansion the file check `[[ -f "~/..." ]]` is always false, # so the short-circuit silently misses pending background tasks. +# +# The fixture lives under a fake HOME rooted inside $TEST_DIR so the tests +# remain portable on sandboxed or read-only-HOME environments. Only the +# specific hook / helper / wrapper invocations that need tilde expansion +# run with HOME=$FAKE_HOME; the rest of the suite keeps the real HOME. echo "Test AC-10: '~/...' transcript path still triggers short-circuit" AC10_REPO="$TEST_DIR/ac10" AC10_LOOP=$(create_full_fixture "$AC10_REPO") AC10_STATE="$AC10_LOOP/state.md" -HOME_FIXTURE_DIR=$(mktemp -d "$HOME/.humanize-bg-allow-test-XXXXXX") -AC10_TRANSCRIPT="$HOME_FIXTURE_DIR/ac10.jsonl" +mkdir -p "$FAKE_HOME/session-data" +AC10_TRANSCRIPT="$FAKE_HOME/session-data/ac10.jsonl" AC10_LAUNCH=$(emit_tool_use_assistant "toolu_H" "Agent" ',"description":"x","prompt":"x"') AC10_RESULT=$(emit_async_agent_launch_result "toolu_H" "agent_pending_H") write_transcript "$AC10_TRANSCRIPT" "$AC10_LAUNCH" "$AC10_RESULT" # Build the tilde-form string literally. Do NOT let the shell expand "~". -AC10_TILDE_PATH="~/${AC10_TRANSCRIPT#$HOME/}" +AC10_TILDE_PATH="~/session-data/ac10.jsonl" AC10_INPUT=$(jq -c -n --arg tp "$AC10_TILDE_PATH" '{transcript_path:$tp}') -run_stop_hook_with_input "$AC10_REPO" "$AC10_INPUT" +run_stop_hook_with_input "$AC10_REPO" "$AC10_INPUT" "$FAKE_HOME" assert_systemmessage_only \ "AC-10: '~/'-prefixed transcript_path is expanded and short-circuits on pending bg" \ "$AC10_REPO" "$AC10_STATE" "1 background task" -# Also prove the helper works directly against a "~/..." argument. -# This avoids masking a helper regression behind the hook's own expansion. +# Also prove the helper works directly against a "~/..." argument under a +# fake HOME. Avoids masking a helper regression behind the hook's own +# normalization. AC10_HELPER_OUT=$( cd "$AC10_REPO" + HOME="$FAKE_HOME" # shellcheck source=/dev/null source "$PROJECT_ROOT/hooks/lib/loop-common.sh" list_pending_background_task_ids "$AC10_TILDE_PATH" 2>/dev/null | sort -u @@ -511,5 +521,29 @@ else "output containing 'agent_pending_H'" "$AC10_HELPER_OUT" fi +# Verify the gate wrapper path with a tilde-form --transcript-path also +# reaches the short-circuit. AC-9 uses an absolute transcript path; this +# covers the same code path with a "~/..." form. +echo "Test AC-10c: rlcr-stop-gate.sh with '~/...' --transcript-path -> ALLOW" +AC10C_OUT="$TEST_DIR/ac10c-out.txt" +set +e +( + cd "$AC10_REPO" + HOME="$FAKE_HOME" "$GATE_SCRIPT" --transcript-path "$AC10_TILDE_PATH" +) > "$AC10C_OUT" 2>&1 +AC10C_EXIT=$? +set -e + +if [[ "$AC10C_EXIT" -eq 0 ]] \ + && grep -q "^ALLOW:" "$AC10C_OUT" \ + && grep -q "background task" "$AC10C_OUT"; then + pass "AC-10c: rlcr-stop-gate.sh expands '~/...' and emits ALLOW with systemMessage" +else + AC10C_BODY=$(cat "$AC10C_OUT" 2>/dev/null || true) + fail "AC-10c: rlcr-stop-gate.sh expands '~/...' and emits ALLOW with systemMessage" \ + "exit 0 + output containing ALLOW: and 'background task'" \ + "exit $AC10C_EXIT; output: $AC10C_BODY" +fi + print_test_summary "Stop Hook Background-Task Allow Test Summary" exit $? From 6a1d931643975e2819abec200f6f362173636da7 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 14:13:38 -0700 Subject: [PATCH 59/97] Prevent RLCR loop orphaning when session dies mid-background Codex review P2 on the background-task short-circuit: allowing the natural stop at the early-exit site returned control to the user while state.md was still bound to the current session_id. If the user closed Claude before the background-task completion notification arrived, find_active_loop in any later session would reject the loop (stored session_id did not match), so the loop was stranded and recovery required manual cancellation. Resolve without touching state.md by introducing a narrow cross-session adoption signal: - Early-exit block now also runs `: > "$LOOP_DIR/bg-pending.marker"` before emitting the systemMessage JSON. Failure is tolerated so the short-circuit never blocks on a flaky filesystem. - find_active_loop's session-filter branch gains a second acceptance step: on a stored-vs-filter session_id mismatch, if the dir has `bg-pending.marker` AND an active state file, it is still accepted. Terminal loops with a stale marker are ignored. Three new regression cases in tests/test-stop-hook-bg-allow.sh: AC-11 session_id mismatch + marker + pending bg -> short-circuit fires; state.md stays byte-identical. AC-11b session_id mismatch + no marker -> hook takes the existing "no active loop" exit-0 path; confirms the marker is the only cross-session adoption signal. AC-11c same-session short-circuit really writes the marker, so AC-11 is grounded in real hook behavior rather than a synthetic setup. Validation: - bash tests/test-stop-hook-bg-allow.sh -> 17 passed, 0 failed - bash tests/run-all-tests.sh -> 1696 passed, 0 failed - HOME=/nonexistent/readonly bash tests/test-stop-hook-bg-allow.sh -> 17 passed, 0 failed No systemMessage wording change. No state.md mutation. Branch still targets dev; version stays at 1.16.0. --- hooks/lib/loop-common.sh | 12 +++ hooks/loop-codex-stop-hook.sh | 3 + tests/test-stop-hook-bg-allow.sh | 127 +++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 3b210793..f89c2eed 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -477,6 +477,18 @@ find_active_loop() { echo "" return fi + + # Session mismatch: adopt the loop only when it was explicitly parked + # for a background task (stop hook writes bg-pending.marker there). + if [[ -f "$trimmed_dir/bg-pending.marker" ]]; then + local active_state_bg + active_state_bg=$(resolve_active_state_file "$trimmed_dir") + if [[ -n "$active_state_bg" ]]; then + echo "$trimmed_dir" + return + fi + # Marker on a terminal loop is stale; ignore it and keep walking. + fi done < <(ls -1d "$loop_base_dir"/*/ 2>/dev/null | sort -r) echo "" diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 5b57a3cb..ec8444e5 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -87,6 +87,9 @@ fi HOOK_TRANSCRIPT_PATH=$(extract_transcript_path "$HOOK_INPUT") if has_pending_background_tasks "$HOOK_TRANSCRIPT_PATH"; then PENDING_BG_COUNT=$(count_pending_background_tasks "$HOOK_TRANSCRIPT_PATH") + # Mark the loop as parked; allows a fresh session to adopt it if this + # Claude window is closed before the background task finishes. + : > "$LOOP_DIR/bg-pending.marker" 2>/dev/null || true jq -n --arg count "$PENDING_BG_COUNT" \ '{systemMessage: ("RLCR loop active. " + $count + " background task(s) still running - stop allowed naturally; loop has NOT terminated and will resume on completion.")}' exit 0 diff --git a/tests/test-stop-hook-bg-allow.sh b/tests/test-stop-hook-bg-allow.sh index 408400a6..b9810246 100755 --- a/tests/test-stop-hook-bg-allow.sh +++ b/tests/test-stop-hook-bg-allow.sh @@ -545,5 +545,132 @@ else "exit $AC10C_EXIT; output: $AC10C_BODY" fi +# ---------------- AC-11 / AC-11b ---------------- +# Orphan prevention: when the short-circuit parks a loop waiting for a +# background task and the user closes that Claude session, a fresh +# session must still be able to pick up the loop. The short-circuit +# writes `bg-pending.marker` into the loop dir; find_active_loop +# accepts a stored-vs-filter session_id mismatch iff the marker is +# present. Without this cross-session adoption path, state.md would +# be stranded with the dead session_id and require manual cancel. +echo "Test AC-11: cross-session bg-pending.marker allows pickup" +AC11_REPO="$TEST_DIR/ac11" +AC11_LOOP=$(create_full_fixture "$AC11_REPO") +AC11_STATE="$AC11_LOOP/state.md" + +# Override state.md with an explicit stored session_id so find_active_loop +# sees a real mismatch when we later pass a different session_id. +AC11_BRANCH=$(git -C "$AC11_REPO" rev-parse --abbrev-ref HEAD) +AC11_BASE_COMMIT=$(git -C "$AC11_REPO" rev-parse HEAD) +cat > "$AC11_STATE" < "$AC11_LOOP/bg-pending.marker" + +AC11_TRANSCRIPT="$TRANSCRIPTS_DIR/ac11.jsonl" +AC11_LAUNCH=$(emit_tool_use_assistant "toolu_I" "Agent" ',"description":"x","prompt":"x"') +AC11_RESULT=$(emit_async_agent_launch_result "toolu_I" "agent_pending_I") +write_transcript "$AC11_TRANSCRIPT" "$AC11_LAUNCH" "$AC11_RESULT" + +AC11_INPUT=$(jq -c -n --arg tp "$AC11_TRANSCRIPT" \ + '{transcript_path:$tp, session_id:"session_beta"}') +run_stop_hook_with_input "$AC11_REPO" "$AC11_INPUT" +assert_systemmessage_only \ + "AC-11: cross-session bg-pending.marker allows pickup and short-circuit" \ + "$AC11_REPO" "$AC11_STATE" "1 background task" + +# Negative counterpart: same session mismatch but NO marker must still +# reject the loop (preserving the existing session-bound isolation when +# the loop was not explicitly parked). +echo "Test AC-11b: cross-session without marker is still rejected" +AC11B_REPO="$TEST_DIR/ac11b" +AC11B_LOOP=$(create_full_fixture "$AC11B_REPO") +AC11B_STATE="$AC11B_LOOP/state.md" +AC11B_BRANCH=$(git -C "$AC11B_REPO" rev-parse --abbrev-ref HEAD) +AC11B_BASE_COMMIT=$(git -C "$AC11B_REPO" rev-parse HEAD) +cat > "$AC11B_STATE" </dev/null || echo "") +if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ ! -f "$RUN_MARKER" ]] && [[ -z "$AC11B_SYS_MSG" ]]; then + pass "AC-11b: cross-session without marker keeps existing isolation (no adoption)" +else + fail "AC-11b: cross-session without marker keeps existing isolation (no adoption)" \ + "exit 0, no Codex marker, no systemMessage" \ + "exit $RUN_EXIT_CODE, marker=$(test -f "$RUN_MARKER" && echo present || echo missing), systemMessage='$AC11B_SYS_MSG'; output: $RUN_OUTPUT" +fi + +# AC-11c: short-circuit should actually write bg-pending.marker so the +# adoption path in AC-11 is reachable from real usage (not only from +# synthetic test setup). +echo "Test AC-11c: short-circuit writes bg-pending.marker" +AC11C_REPO="$TEST_DIR/ac11c" +AC11C_LOOP=$(create_full_fixture "$AC11C_REPO") +AC11C_MARKER="$AC11C_LOOP/bg-pending.marker" +[[ -e "$AC11C_MARKER" ]] && rm -f "$AC11C_MARKER" + +AC11C_TRANSCRIPT="$TRANSCRIPTS_DIR/ac11c.jsonl" +AC11C_LAUNCH=$(emit_tool_use_assistant "toolu_K" "Agent" ',"description":"x","prompt":"x"') +AC11C_RESULT=$(emit_async_agent_launch_result "toolu_K" "agent_pending_K") +write_transcript "$AC11C_TRANSCRIPT" "$AC11C_LAUNCH" "$AC11C_RESULT" + +AC11C_INPUT=$(jq -c -n --arg tp "$AC11C_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC11C_REPO" "$AC11C_INPUT" +if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ -f "$AC11C_MARKER" ]]; then + pass "AC-11c: short-circuit path writes bg-pending.marker into loop dir" +else + fail "AC-11c: short-circuit path writes bg-pending.marker into loop dir" \ + "exit 0 and bg-pending.marker present" \ + "exit $RUN_EXIT_CODE, marker=$(test -f "$AC11C_MARKER" && echo present || echo missing); output: $RUN_OUTPUT" +fi + print_test_summary "Stop Hook Background-Task Allow Test Summary" exit $? From 69587a777fb5feb6c159af3d0d610321e27582ac Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 14:25:45 -0700 Subject: [PATCH 60/97] Prefer own-session match and clean marker on resume Codex review flagged two regressions from the previous round's bg-pending.marker change: [P1] find_active_loop could return the newest loop dir that had a marker before the scan reached an older dir whose stored session_id actually matched the caller. With multiple active RLCR loops in one repo that breaks session isolation and lets a stop hook attach to the wrong conversation. [P2] The short-circuit wrote bg-pending.marker but nothing cleared it when the next stop saw no pending background task. A later stop from a different session_id would keep being adopted through the stale marker long after the bg had resolved. Both fixes are in the files already touched by this feature: * find_active_loop's session-filter branch now makes exact stored-vs-filter match win over marker fallback. Marker candidates are recorded while the scan walks newest-to-oldest and only returned after the whole listing fails to yield an exact match. Zombie-loop protection still wins for the caller's own session. This preserves Round 3 orphan recovery while restoring isolation across concurrent sessions. * After has_pending_background_tasks returns false, the stop hook now handles the marker: if HOOK_SESSION_ID differs from the stored session_id in the active state file, it rewrites that line with portable sed -i.bak; then the marker is removed unconditionally. Rewrite failure is logged but non-fatal. Regressions in tests/test-stop-hook-bg-allow.sh: AC-12 find_active_loop returns the older exact-match dir over a newer foreign-session dir that has a marker. AC-12b find_active_loop does not touch a foreign session's marker. AC-13 Same-session resume with a stale marker + empty bg transcript clears the marker. AC-13b Same-session resume leaves state.md session_id unchanged. AC-14 Cross-session resume clears the marker. AC-14b Cross-session resume rewrites state.md session_id to the caller's session. Validation: - bash tests/test-stop-hook-bg-allow.sh -> 23 passed, 0 failed - bash tests/run-all-tests.sh -> 1702 passed, 0 failed - HOME=/nonexistent/readonly bash tests/test-stop-hook-bg-allow.sh -> 23 passed, 0 failed systemMessage wording unchanged. Version stays at 1.16.0. --- hooks/lib/loop-common.sh | 44 ++++++--- hooks/loop-codex-stop-hook.sh | 22 +++++ tests/test-stop-hook-bg-allow.sh | 156 +++++++++++++++++++++++++++++++ 3 files changed, 208 insertions(+), 14 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index f89c2eed..62af2c12 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -448,9 +448,18 @@ find_active_loop() { return fi - # Session filter: iterate newest-to-oldest, find the first dir belonging - # to this session (any state file), then check if it is still active. + # Session filter: iterate newest-to-oldest. + # + # The caller's own (exact stored session_id) match takes precedence over + # any marker-based adoption: with multiple active RLCR loops in the same + # repo, a newer dir parked by a different session must not be returned + # before an older dir that actually belongs to the caller. Marker + # candidates are recorded during the scan and only used as a fallback + # when no exact match is found anywhere. Zombie-loop protection + # (terminal newest for this session returns empty) still wins over + # marker fallback. local dir + local marker_candidate="" while IFS= read -r dir; do [[ -z "$dir" ]] && continue local trimmed_dir="${dir%/}" @@ -464,9 +473,9 @@ find_active_loop() { local stored_session_id stored_session_id=$(sed -n '/^---$/,/^---$/{ /^'"${FIELD_SESSION_ID}"':/{ s/'"${FIELD_SESSION_ID}"': *//; p; } }' "$any_state" 2>/dev/null | tr -d ' ') - # Empty stored session_id matches any session (backward compat) + # Empty stored session_id matches any session (backward compat). if [[ -z "$stored_session_id" ]] || [[ "$stored_session_id" == "$filter_session_id" ]]; then - # This is the newest dir for this session -- only return if active + # Newest dir for this session -- only return if active. local active_state active_state=$(resolve_active_state_file "$trimmed_dir") if [[ -n "$active_state" ]]; then @@ -474,27 +483,34 @@ find_active_loop() { return fi # Session's newest loop is in terminal state; do not fall through + # to marker-based adoption either. echo "" return fi - # Session mismatch: adopt the loop only when it was explicitly parked - # for a background task (stop hook writes bg-pending.marker there). - if [[ -f "$trimmed_dir/bg-pending.marker" ]]; then - local active_state_bg - active_state_bg=$(resolve_active_state_file "$trimmed_dir") - if [[ -n "$active_state_bg" ]]; then - echo "$trimmed_dir" - return + # Session mismatch: stash the newest eligible marker candidate but + # keep walking in case an older dir is the caller's own session. + if [[ -z "$marker_candidate" ]] && [[ -f "$trimmed_dir/bg-pending.marker" ]]; then + local candidate_state + candidate_state=$(resolve_active_state_file "$trimmed_dir") + if [[ -n "$candidate_state" ]]; then + marker_candidate="$trimmed_dir" fi - # Marker on a terminal loop is stale; ignore it and keep walking. + # Marker on a terminal loop is stale; leave it alone. fi done < <(ls -1d "$loop_base_dir"/*/ 2>/dev/null | sort -r) + # No exact session match. Fall back to marker-based adoption if any -- + # this is the cross-session recovery path when a previous session parked + # the loop and then died before the background-task completion arrived. + if [[ -n "$marker_candidate" ]]; then + echo "$marker_candidate" + return + fi + echo "" } - # Extract current round number from state.md # Outputs the round number to stdout, defaults to 0 # Note: For full state parsing, use parse_state_file() instead diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index ec8444e5..d018e598 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -95,6 +95,28 @@ if has_pending_background_tasks "$HOOK_TRANSCRIPT_PATH"; then exit 0 fi +# No pending background task. If a stale bg-pending.marker is lingering +# here, this stop is the resume point. When find_active_loop picked this +# dir up through the marker-fallback path (stored session_id differs from +# the current one), rewrite the stored session_id so future same-session +# stops use the exact-match path, then remove the marker so any later +# hook trigger from an unrelated session is rejected rather than adopted. +if [[ -f "$LOOP_DIR/bg-pending.marker" ]]; then + ADOPT_STATE_FILE=$(resolve_active_state_file "$LOOP_DIR") + if [[ -n "$ADOPT_STATE_FILE" ]] && [[ -n "$HOOK_SESSION_ID" ]]; then + STORED_SID_ADOPT=$(sed -n '/^---$/,/^---$/{ /^'"${FIELD_SESSION_ID}"':/{ s/^'"${FIELD_SESSION_ID}"': *//; p; } }' "$ADOPT_STATE_FILE" 2>/dev/null | tr -d ' ') + if [[ -n "$STORED_SID_ADOPT" ]] && [[ "$STORED_SID_ADOPT" != "$HOOK_SESSION_ID" ]]; then + # Portable in-place rewrite. Failure is logged but non-fatal: + # worst case the next stop re-adopts via the marker pathway. + if ! sed -i.bak -E "s|^(${FIELD_SESSION_ID}:).*$|\\1 $HOOK_SESSION_ID|" "$ADOPT_STATE_FILE" 2>/dev/null; then + echo "Warning: failed to adopt session_id in $ADOPT_STATE_FILE" >&2 + fi + rm -f "${ADOPT_STATE_FILE}.bak" 2>/dev/null || true + fi + fi + rm -f "$LOOP_DIR/bg-pending.marker" 2>/dev/null || true +fi + # ======================================== # Detect Loop Phase: Normal or Finalize # ======================================== diff --git a/tests/test-stop-hook-bg-allow.sh b/tests/test-stop-hook-bg-allow.sh index b9810246..3dda7356 100755 --- a/tests/test-stop-hook-bg-allow.sh +++ b/tests/test-stop-hook-bg-allow.sh @@ -672,5 +672,161 @@ else "exit $RUN_EXIT_CODE, marker=$(test -f "$AC11C_MARKER" && echo present || echo missing); output: $RUN_OUTPUT" fi +# ---------------- AC-12 ---------------- +# Session isolation under multiple concurrent RLCR loops: when the caller's +# own exact-match dir exists in the listing, find_active_loop must return +# it even if a newer sibling dir (belonging to another session) also has a +# bg-pending.marker. The marker fallback is only for orphan recovery when +# no exact match exists. +echo "Test AC-12: find_active_loop prefers exact session match over marker" +AC12_BASE="$TEST_DIR/ac12-loops" +mkdir -p "$AC12_BASE/2026-03-02_00-00-00" +mkdir -p "$AC12_BASE/2026-03-01_00-00-00" + +cat > "$AC12_BASE/2026-03-02_00-00-00/state.md" <<'EOF_AC12_NEWER' +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.4 +codex_effort: high +session_id: session_foreign +--- +EOF_AC12_NEWER +: > "$AC12_BASE/2026-03-02_00-00-00/bg-pending.marker" + +cat > "$AC12_BASE/2026-03-01_00-00-00/state.md" <<'EOF_AC12_OLDER' +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.4 +codex_effort: high +session_id: session_home +--- +EOF_AC12_OLDER + +AC12_RESULT=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + find_active_loop "$AC12_BASE" "session_home" +) +if [[ "$AC12_RESULT" == "$AC12_BASE/2026-03-01_00-00-00" ]]; then + pass "AC-12: find_active_loop returns older exact-match dir over newer marker dir" +else + fail "AC-12: find_active_loop returns older exact-match dir over newer marker dir" \ + "$AC12_BASE/2026-03-01_00-00-00" "$AC12_RESULT" +fi + +if [[ -f "$AC12_BASE/2026-03-02_00-00-00/bg-pending.marker" ]]; then + pass "AC-12b: foreign session's marker untouched by find_active_loop scan" +else + fail "AC-12b: foreign session's marker untouched by find_active_loop scan" \ + "newer dir marker still present" "marker was removed" +fi + +# ---------------- AC-13 ---------------- +# Same-session resume after background completion: a stale marker from the +# previous short-circuit must be cleaned up on the next stop where no bg is +# pending. State.md session_id stays put because it already matches. +echo "Test AC-13: same-session resume removes stale bg-pending.marker" +AC13_REPO="$TEST_DIR/ac13" +AC13_LOOP=$(create_full_fixture "$AC13_REPO") +AC13_STATE="$AC13_LOOP/state.md" +AC13_BRANCH=$(git -C "$AC13_REPO" rev-parse --abbrev-ref HEAD) +AC13_BASE_COMMIT=$(git -C "$AC13_REPO" rev-parse HEAD) +cat > "$AC13_STATE" < "$AC13_LOOP/bg-pending.marker" + +AC13_TRANSCRIPT="$TRANSCRIPTS_DIR/ac13.jsonl" +write_transcript "$AC13_TRANSCRIPT" '{"type":"user","message":{"role":"user","content":"hello"}}' +AC13_INPUT=$(jq -c -n --arg tp "$AC13_TRANSCRIPT" \ + '{transcript_path:$tp, session_id:"session_home"}') +run_stop_hook_with_input "$AC13_REPO" "$AC13_INPUT" + +if [[ ! -f "$AC13_LOOP/bg-pending.marker" ]]; then + pass "AC-13: marker removed on non-short-circuit resume (same session)" +else + fail "AC-13: marker removed on non-short-circuit resume (same session)" \ + "marker absent" "marker still present" +fi + +if grep -q "^session_id: session_home$" "$AC13_STATE"; then + pass "AC-13b: same-session resume leaves state.md session_id unchanged" +else + fail "AC-13b: same-session resume leaves state.md session_id unchanged" \ + "session_id: session_home" "$(grep '^session_id:' "$AC13_STATE" || echo '(missing)')" +fi + +# ---------------- AC-14 ---------------- +# Cross-session resume: a different session walks in, finds the loop through +# the marker fallback, and the non-short-circuit path must rewrite the +# stored session_id so future same-session stops use the exact-match path +# instead of re-adopting via a stale marker. +echo "Test AC-14: cross-session resume rewrites session_id and removes marker" +AC14_REPO="$TEST_DIR/ac14" +AC14_LOOP=$(create_full_fixture "$AC14_REPO") +AC14_STATE="$AC14_LOOP/state.md" +AC14_BRANCH=$(git -C "$AC14_REPO" rev-parse --abbrev-ref HEAD) +AC14_BASE_COMMIT=$(git -C "$AC14_REPO" rev-parse HEAD) +cat > "$AC14_STATE" < "$AC14_LOOP/bg-pending.marker" + +AC14_TRANSCRIPT="$TRANSCRIPTS_DIR/ac14.jsonl" +write_transcript "$AC14_TRANSCRIPT" '{"type":"user","message":{"role":"user","content":"hello"}}' +AC14_INPUT=$(jq -c -n --arg tp "$AC14_TRANSCRIPT" \ + '{transcript_path:$tp, session_id:"session_home"}') +run_stop_hook_with_input "$AC14_REPO" "$AC14_INPUT" + +if [[ ! -f "$AC14_LOOP/bg-pending.marker" ]]; then + pass "AC-14: marker removed after cross-session resume" +else + fail "AC-14: marker removed after cross-session resume" \ + "marker absent" "marker still present" +fi + +if grep -q "^session_id: session_home$" "$AC14_STATE"; then + pass "AC-14b: state.md session_id rewritten to the current session on adoption" +else + fail "AC-14b: state.md session_id rewritten to the current session on adoption" \ + "session_id: session_home" "$(grep '^session_id:' "$AC14_STATE" || echo '(missing)')" +fi + print_test_summary "Stop Hook Background-Task Allow Test Summary" exit $? From 05359199a442d3db994a3f4bcb9783190ba7c515 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 14:40:49 -0700 Subject: [PATCH 61/97] Recognise SDK task_notification completions and keep recovery marker Two Codex review findings on the previous round: [P1] The completion parser in list_pending_background_task_ids only understood the legacy queue-operation XML blocks. Current Claude Code transcripts emit background-task completions as SDKTaskNotificationMessage records: type: "system", subtype: "task_notification", task_id: ... Without SDK-format recognition, launched ids stayed pending forever and the short-circuit would fire on every stop. [P2] The non-short-circuit path cleared bg-pending.marker unconditionally whenever has_pending_background_tasks returned false. That helper is fail-closed, so it also returned false when transcript_path was missing or unreadable (e.g. rlcr-stop-gate.sh without --transcript-path). In that case the cleanup still deleted the marker and rewrote the stored session_id, breaking cross-session recovery exactly where transcript inspection is unavailable. Fixes, confined to the files already touched by this feature: * loop-common.sh: union the completion set from both SDK (type:system subtype:task_notification -> .task_id) and legacy (queue-operation XML) sources. Wrap the legacy branch's grep in `{ grep -oE ... || true; }` so its "no match -> exit 1" cannot combine with set -o pipefail to invalidate the SDK side of the union through the `|| completed=""` fallback. * loop-codex-stop-hook.sh: gate the non-short-circuit marker cleanup on `HOOK_TRANSCRIPT_PATH` being a readable regular file. When transcript inspection is unavailable, leave the marker AND the stored session_id untouched so cross-session recovery stays reachable. Regressions added in tests/test-stop-hook-bg-allow.sh: AC-15 helper treats an SDK task_notification as terminal. AC-16 helper unions SDK + legacy completion formats. AC-17 missing transcript_path key -> marker preserved. AC-17b same scenario -> stored session_id preserved. AC-17c transcript_path pointing at a non-existent file -> same guarantees. A small emit_sdk_task_notification helper was added to keep AC-15 and AC-16 declarative. Validation: - bash tests/test-stop-hook-bg-allow.sh -> 28 passed, 0 failed - bash tests/run-all-tests.sh -> 1707 passed, 0 failed - HOME=/nonexistent/readonly bash tests/test-stop-hook-bg-allow.sh -> 28 passed, 0 failed systemMessage wording unchanged. Version stays at 1.16.0. --- hooks/lib/loop-common.sh | 47 ++++++--- hooks/loop-codex-stop-hook.sh | 11 ++- tests/test-stop-hook-bg-allow.sh | 157 +++++++++++++++++++++++++++++++ 3 files changed, 201 insertions(+), 14 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 62af2c12..54944567 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -276,11 +276,17 @@ extract_transcript_path() { # - Background shell: toolUseResult.backgroundTaskId non-empty # -> id is toolUseResult.backgroundTaskId # -# Completion events (inspected in "queue-operation" messages with -# operation == "enqueue" whose content contains a -# XML block): any ... value is treated as terminal -# regardless of the reported (completed, failed, killed, -# cancelled, interrupted, ...). +# Completion events are recognised from two Claude Code transcript forms: +# +# 1. Structured SDK record +# (see SDKTaskNotificationMessage in docs/typescript.md): +# `type == "system"`, `subtype == "task_notification"`, +# `task_id` is the completed id. Any `status` value +# (completed, failed, stopped, ...) is treated as terminal. +# +# 2. Legacy queue-operation enqueue whose `content` embeds a +# `` XML block with `...`; +# kept for transcripts produced by older Claude Code versions. # # pending := launched \ completed # @@ -315,14 +321,29 @@ list_pending_background_task_ids() { | (.toolUseResult.agentId // .toolUseResult.backgroundTaskId) ' "$transcript_path" 2>/dev/null | sort -u) || return 1 - completed=$(jq -r ' - select(.type == "queue-operation" and .operation == "enqueue") - | (.content // "" | tostring) - | select(contains("")) - ' "$transcript_path" 2>/dev/null \ - | grep -oE '[^<]+' \ - | sed -E 's|||g' \ - | sort -u) || completed="" + # Union of both completion formats. Either source alone is enough to + # mark a launched id terminal. + # + # The `grep -oE || true` guard on the legacy branch keeps `set -o + # pipefail` from poisoning the combined pipeline when no legacy + # queue-operation records exist in the transcript (grep with `-o` + # exits 1 on no matches, which would otherwise wipe out any SDK + # task_notification results collected above). + completed=$( + { + jq -r ' + select(.type == "system" and .subtype == "task_notification") + | (.task_id // empty) + ' "$transcript_path" 2>/dev/null + jq -r ' + select(.type == "queue-operation" and .operation == "enqueue") + | (.content // "" | tostring) + | select(contains("")) + ' "$transcript_path" 2>/dev/null \ + | { grep -oE '[^<]+' || true; } \ + | sed -E 's|||g' + } | sort -u | sed '/^$/d' + ) || completed="" # Emit launched ids that have no matching completion notification. comm -23 \ diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index d018e598..d52fadbf 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -101,7 +101,16 @@ fi # the current one), rewrite the stored session_id so future same-session # stops use the exact-match path, then remove the marker so any later # hook trigger from an unrelated session is rejected rather than adopted. -if [[ -f "$LOOP_DIR/bg-pending.marker" ]]; then +# +# Guard: only perform the cleanup when we could actually inspect the +# transcript. `has_pending_background_tasks` is fail-closed and also +# returns false when the transcript is missing or unreadable (e.g. +# rlcr-stop-gate.sh invoked without --transcript-path). In that case the +# "no pending" signal is not authoritative, so the marker and the stored +# session_id must be preserved to keep cross-session recovery reachable. +if [[ -f "$LOOP_DIR/bg-pending.marker" ]] \ + && [[ -n "$HOOK_TRANSCRIPT_PATH" ]] \ + && [[ -f "$HOOK_TRANSCRIPT_PATH" ]]; then ADOPT_STATE_FILE=$(resolve_active_state_file "$LOOP_DIR") if [[ -n "$ADOPT_STATE_FILE" ]] && [[ -n "$HOOK_SESSION_ID" ]]; then STORED_SID_ADOPT=$(sed -n '/^---$/,/^---$/{ /^'"${FIELD_SESSION_ID}"':/{ s/^'"${FIELD_SESSION_ID}"': *//; p; } }' "$ADOPT_STATE_FILE" 2>/dev/null | tr -d ' ') diff --git a/tests/test-stop-hook-bg-allow.sh b/tests/test-stop-hook-bg-allow.sh index 3dda7356..71ffb9e0 100755 --- a/tests/test-stop-hook-bg-allow.sh +++ b/tests/test-stop-hook-bg-allow.sh @@ -215,6 +215,12 @@ emit_task_completion_event() { '{type:"queue-operation", operation:"enqueue", content:$content}' } +emit_sdk_task_notification() { + local task_id="$1" tool_use_id="$2" status="${3:-completed}" + jq -c -n --arg tid "$task_id" --arg tu "$tool_use_id" --arg st "$status" \ + '{type:"system", subtype:"task_notification", task_id:$tid, tool_use_id:$tu, status:$st}' +} + write_transcript() { local path="$1" shift @@ -828,5 +834,156 @@ else "session_id: session_home" "$(grep '^session_id:' "$AC14_STATE" || echo '(missing)')" fi +# ---------------- AC-15 ---------------- +# Completion recognition: the current Claude Code transcript format emits +# background-task completion as +# type: "system", subtype: "task_notification", task_id: "..." +# The helper must recognise this form (not only the legacy queue-operation +# XML block) or launched tasks will stay "pending" forever. +echo "Test AC-15: task_notification system records mark launches completed" +AC15_TRANSCRIPT="$TRANSCRIPTS_DIR/ac15.jsonl" +AC15_LAUNCH=$(emit_tool_use_assistant "toolu_L" "Agent" ',"description":"x","prompt":"x"') +AC15_RESULT=$(emit_async_agent_launch_result "toolu_L" "agent_done_L") +AC15_NOTIF=$(emit_sdk_task_notification "agent_done_L" "toolu_L" "completed") +write_transcript "$AC15_TRANSCRIPT" "$AC15_LAUNCH" "$AC15_RESULT" "$AC15_NOTIF" + +AC15_PENDING=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + list_pending_background_task_ids "$AC15_TRANSCRIPT" 2>/dev/null +) +if [[ -z "$AC15_PENDING" ]]; then + pass "AC-15: task_notification completion removes the matching launch from pending" +else + fail "AC-15: task_notification completion removes the matching launch from pending" \ + "empty pending list" "got: $AC15_PENDING" +fi + +# ---------------- AC-16 ---------------- +# Completion recognition mixed formats: two launches, one completed via the +# legacy queue-operation XML block, the other via the current +# system/task_notification record. Union of both sources must resolve to +# an empty pending set. +echo "Test AC-16: helper unions legacy queue-operation and task_notification completions" +AC16_TRANSCRIPT="$TRANSCRIPTS_DIR/ac16.jsonl" +AC16_L1=$(emit_tool_use_assistant "toolu_M1" "Agent" ',"description":"x","prompt":"x"') +AC16_R1=$(emit_async_agent_launch_result "toolu_M1" "agent_legacy_M1") +AC16_C1=$(emit_task_completion_event "agent_legacy_M1" "toolu_M1" "completed") +AC16_L2=$(emit_tool_use_assistant "toolu_M2" "Agent" ',"description":"y","prompt":"y"') +AC16_R2=$(emit_async_agent_launch_result "toolu_M2" "agent_sdk_M2") +AC16_C2=$(emit_sdk_task_notification "agent_sdk_M2" "toolu_M2" "completed") +write_transcript "$AC16_TRANSCRIPT" \ + "$AC16_L1" "$AC16_R1" "$AC16_C1" \ + "$AC16_L2" "$AC16_R2" "$AC16_C2" + +AC16_PENDING=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + list_pending_background_task_ids "$AC16_TRANSCRIPT" 2>/dev/null +) +if [[ -z "$AC16_PENDING" ]]; then + pass "AC-16: mixed legacy+SDK completion records resolve to empty pending set" +else + fail "AC-16: mixed legacy+SDK completion records resolve to empty pending set" \ + "empty pending list" "got: $AC16_PENDING" +fi + +# ---------------- AC-17 ---------------- +# Marker preservation when completion cannot be verified: if +# transcript_path is missing or unreadable, has_pending_background_tasks +# fails closed (returns no pending). The non-short-circuit cleanup must NOT +# erase bg-pending.marker or rewrite session_id in that case, because the +# cross-session recovery signal is still needed. +echo "Test AC-17: missing transcript preserves bg-pending.marker and session_id" +AC17_REPO="$TEST_DIR/ac17" +AC17_LOOP=$(create_full_fixture "$AC17_REPO") +AC17_STATE="$AC17_LOOP/state.md" +AC17_BRANCH=$(git -C "$AC17_REPO" rev-parse --abbrev-ref HEAD) +AC17_BASE_COMMIT=$(git -C "$AC17_REPO" rev-parse HEAD) +cat > "$AC17_STATE" < "$AC17_LOOP/bg-pending.marker" + +# Hook input has NO transcript_path -> has_pending_background_tasks is +# fail-closed; cleanup path must leave marker and session_id intact. +AC17_INPUT='{"session_id":"session_home"}' +run_stop_hook_with_input "$AC17_REPO" "$AC17_INPUT" + +if [[ -f "$AC17_LOOP/bg-pending.marker" ]]; then + pass "AC-17: unreadable transcript preserves bg-pending.marker" +else + fail "AC-17: unreadable transcript preserves bg-pending.marker" \ + "marker still present" "marker was removed" +fi + +if grep -q "^session_id: session_foreign$" "$AC17_STATE"; then + pass "AC-17b: unreadable transcript leaves stored session_id untouched" +else + fail "AC-17b: unreadable transcript leaves stored session_id untouched" \ + "session_id: session_foreign" "$(grep '^session_id:' "$AC17_STATE" || echo '(missing)')" +fi + +# AC-17c: transcript_path is provided but points at a non-existent file +# (equally unreadable). Same guarantee: marker + stored session_id +# preserved. +echo "Test AC-17c: transcript_path pointing at non-existent file preserves marker" +AC17C_REPO="$TEST_DIR/ac17c" +AC17C_LOOP=$(create_full_fixture "$AC17C_REPO") +AC17C_STATE="$AC17C_LOOP/state.md" +AC17C_BRANCH=$(git -C "$AC17C_REPO" rev-parse --abbrev-ref HEAD) +AC17C_BASE_COMMIT=$(git -C "$AC17C_REPO" rev-parse HEAD) +cat > "$AC17C_STATE" < "$AC17C_LOOP/bg-pending.marker" + +AC17C_INPUT=$(jq -c -n --arg tp "$TRANSCRIPTS_DIR/never-written.jsonl" \ + '{transcript_path:$tp, session_id:"session_home"}') +run_stop_hook_with_input "$AC17C_REPO" "$AC17C_INPUT" + +if [[ -f "$AC17C_LOOP/bg-pending.marker" ]] \ + && grep -q "^session_id: session_foreign$" "$AC17C_STATE"; then + pass "AC-17c: missing-file transcript_path preserves marker and session_id" +else + fail "AC-17c: missing-file transcript_path preserves marker and session_id" \ + "marker present and session_id: session_foreign" \ + "marker=$(test -f "$AC17C_LOOP/bg-pending.marker" && echo present || echo missing); session_id=$(grep '^session_id:' "$AC17C_STATE" || echo '(missing)')" +fi + print_test_summary "Stop Hook Background-Task Allow Test Summary" exit $? From 60e263504442255fcc4edf2ca89a2f805e8e77c1 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 14:56:17 -0700 Subject: [PATCH 62/97] Narrow cross-session adoption and block foreign-session hijack Three blocking findings from the previous review: [P1] The Round 5 non-short-circuit cleanup hijacked foreign parked loops: session B could rewrite a parked loop's stored session_id to B and delete the marker using B's own transcript, even though A's background task was still running. A was locked out of its own loop. [P2] Every caller of find_active_loop inherited the marker fallback, including loop-read-validator, loop-write- validator, loop-bash-validator, and loop-plan-file- validator. An unrelated session's validators started enforcing foreign parked-loop gates (notably the methodology-analysis phase) on ordinary writes and bash commands, breaking existing session isolation guarantees. [P2] The Round 4 sed -i.bak session_id rewrite injected HOOK_SESSION_ID into the replacement text unescaped. Session IDs containing `&` are valid per this repo's test-session-id.sh; adopting such an id corrupted state.md. Fixes, confined to files already touched by this feature: * loop-common.sh: find_active_loop gains a third positional parameter allow_bg_marker_fallback (default false). Both the inner marker_candidate record and the post-loop fallback return are gated on it. Only the stop hook opts in; validators keep the pre-Round-3 strict isolation. * loop-codex-stop-hook.sh: - Call find_active_loop with `true` to opt in. - Add a new "Cross-Session Parked-Loop Guard" block before the short-circuit: when bg-pending.marker is present AND the stored session_id differs from HOOK_SESSION_ID, emit a dedicated "parked by another Claude session" systemMessage and exit 0 without touching marker, state.md, or session_id. B's hook can never advance A's parked loop on B's transcript. - Simplify the non-short-circuit cleanup: the guard ensures only same-session cases reach here, so the cleanup is now just `rm -f bg-pending.marker` (still gated on transcript readability). The sed session_id rewrite is removed entirely, which also removes the unescaped-metacharacter issue. Regressions in tests/test-stop-hook-bg-allow.sh: AC-11 (rewritten) cross-session + marker -> "parked" systemMessage + marker preserved + state.md byte-identical. AC-14 (rewritten) anti-hijack: cross-session stop preserves bg-pending.marker. AC-14b (rewritten) cross-session stop leaves stored session_id intact. AC-18 NEW. find_active_loop default (no opt-in) ignores a foreign marker dir -> validators stay isolated. AC-18b NEW. find_active_loop with opt-in does return the marker dir (confirms the flag is wired). All other existing regressions continue to pass. Validation: - bash tests/test-stop-hook-bg-allow.sh -> 30 passed, 0 failed - bash tests/run-all-tests.sh -> 1709 passed, 0 failed - HOME=/nonexistent/readonly bash tests/test-stop-hook-bg-allow.sh -> 30 passed, 0 failed Original short-circuit systemMessage wording unchanged. Version stays at 1.16.0. --- hooks/lib/loop-common.sh | 26 +++++-- hooks/loop-codex-stop-hook.sh | 67 ++++++++++-------- tests/test-stop-hook-bg-allow.sh | 114 +++++++++++++++++++++++-------- 3 files changed, 143 insertions(+), 64 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 54944567..b8037614 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -442,10 +442,17 @@ resolve_any_state_file() { # Empty stored session_id matches any filter (backward compat for pre-session # state files). # +# Third parameter `allow_bg_marker_fallback` (default "false"): when "true", +# the session-filter branch also considers a mismatched-session dir that holds +# a `bg-pending.marker` file AND an active state file. Only the RLCR stop +# hook opts in to this; every other caller (read/write/bash/plan-file +# validators, ...) keeps strict session isolation. +# # Outputs the directory path to stdout, or empty string if none found find_active_loop() { local loop_base_dir="$1" local filter_session_id="${2:-}" + local allow_bg_marker_fallback="${3:-false}" if [[ ! -d "$loop_base_dir" ]]; then echo "" @@ -509,9 +516,13 @@ find_active_loop() { return fi - # Session mismatch: stash the newest eligible marker candidate but - # keep walking in case an older dir is the caller's own session. - if [[ -z "$marker_candidate" ]] && [[ -f "$trimmed_dir/bg-pending.marker" ]]; then + # Session mismatch. Only the stop hook opts in to marker-based + # adoption; validators and other callers keep strict isolation, so + # the candidate is only recorded when the caller explicitly allows + # it. + if [[ "$allow_bg_marker_fallback" == "true" ]] \ + && [[ -z "$marker_candidate" ]] \ + && [[ -f "$trimmed_dir/bg-pending.marker" ]]; then local candidate_state candidate_state=$(resolve_active_state_file "$trimmed_dir") if [[ -n "$candidate_state" ]]; then @@ -521,10 +532,11 @@ find_active_loop() { fi done < <(ls -1d "$loop_base_dir"/*/ 2>/dev/null | sort -r) - # No exact session match. Fall back to marker-based adoption if any -- - # this is the cross-session recovery path when a previous session parked - # the loop and then died before the background-task completion arrived. - if [[ -n "$marker_candidate" ]]; then + # No exact session match. Fall back to marker-based adoption only when + # the caller explicitly opted in -- the stop hook uses this to surface + # a "parked by another session" notice or to resume its own parked + # loop after a previous session died before the bg completion arrived. + if [[ "$allow_bg_marker_fallback" == "true" ]] && [[ -n "$marker_candidate" ]]; then echo "$marker_candidate" return fi diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index d52fadbf..58bf6c1e 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -61,13 +61,37 @@ GIT_TIMEOUT=30 # Extract session_id from hook input for session-aware loop filtering HOOK_SESSION_ID=$(extract_session_id "$HOOK_INPUT") -LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID") +LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID" true) # If no active loop (or session_id mismatch), allow exit if [[ -z "$LOOP_DIR" ]]; then exit 0 fi +# ======================================== +# Cross-Session Parked-Loop Guard +# ======================================== +# If find_active_loop handed this dir over via the marker fallback, the +# loop is parked by a different session waiting on a background task. The +# current session has no authority to inspect or advance that loop - its +# transcript sees none of the foreign bg activity - so the only safe +# response is to exit 0 with a distinct systemMessage and leave every +# on-disk artifact (state file, stored session_id, marker) untouched. +HOOK_TRANSCRIPT_PATH=$(extract_transcript_path "$HOOK_INPUT") +if [[ -f "$LOOP_DIR/bg-pending.marker" ]]; then + GUARD_STATE_FILE=$(resolve_active_state_file "$LOOP_DIR") + if [[ -n "$GUARD_STATE_FILE" ]]; then + GUARD_STORED_SID=$(sed -n '/^---$/,/^---$/{ /^'"${FIELD_SESSION_ID}"':/{ s/^'"${FIELD_SESSION_ID}"': *//; p; } }' "$GUARD_STATE_FILE" 2>/dev/null | tr -d ' ') + if [[ -n "$GUARD_STORED_SID" ]] \ + && [[ -n "$HOOK_SESSION_ID" ]] \ + && [[ "$GUARD_STORED_SID" != "$HOOK_SESSION_ID" ]]; then + jq -n \ + '{systemMessage: "RLCR loop in this repo is parked by another Claude session waiting for background work. Stop allowed; your session leaves the loop untouched. If that session ended, run /humanize:cancel-rlcr-loop to clean up."}' + exit 0 + fi + fi +fi + # ======================================== # Early Exit: Pending Background Tasks # ======================================== @@ -84,45 +108,32 @@ fi # # This check MUST run before any other gate (phase detection, state parsing, # branch / plan / git-clean / summary / max-iter checks, Codex review). -HOOK_TRANSCRIPT_PATH=$(extract_transcript_path "$HOOK_INPUT") if has_pending_background_tasks "$HOOK_TRANSCRIPT_PATH"; then PENDING_BG_COUNT=$(count_pending_background_tasks "$HOOK_TRANSCRIPT_PATH") - # Mark the loop as parked; allows a fresh session to adopt it if this - # Claude window is closed before the background task finishes. + # Mark the loop as parked; allows the same session to resume later and + # makes the cross-session guard above reachable if the user opens a + # different Claude session in this repo before the bg task completes. : > "$LOOP_DIR/bg-pending.marker" 2>/dev/null || true jq -n --arg count "$PENDING_BG_COUNT" \ '{systemMessage: ("RLCR loop active. " + $count + " background task(s) still running - stop allowed naturally; loop has NOT terminated and will resume on completion.")}' exit 0 fi -# No pending background task. If a stale bg-pending.marker is lingering -# here, this stop is the resume point. When find_active_loop picked this -# dir up through the marker-fallback path (stored session_id differs from -# the current one), rewrite the stored session_id so future same-session -# stops use the exact-match path, then remove the marker so any later -# hook trigger from an unrelated session is rejected rather than adopted. +# Same-session resume after background task finished: the cross-session +# guard above already exited for every foreign session, so reaching here +# with the marker present means the CURRENT session parked the loop and +# has now come back with a transcript showing no pending bg events. +# Remove the stale marker before the normal flow takes over. # -# Guard: only perform the cleanup when we could actually inspect the -# transcript. `has_pending_background_tasks` is fail-closed and also -# returns false when the transcript is missing or unreadable (e.g. -# rlcr-stop-gate.sh invoked without --transcript-path). In that case the -# "no pending" signal is not authoritative, so the marker and the stored -# session_id must be preserved to keep cross-session recovery reachable. +# Guard: only run when we could actually inspect the transcript. +# `has_pending_background_tasks` is fail-closed and also returns false +# when the transcript is missing or unreadable (e.g. rlcr-stop-gate.sh +# invoked without --transcript-path). In that case the "no pending" +# signal is not authoritative, so the marker stays in place to keep +# cross-session recovery reachable. if [[ -f "$LOOP_DIR/bg-pending.marker" ]] \ && [[ -n "$HOOK_TRANSCRIPT_PATH" ]] \ && [[ -f "$HOOK_TRANSCRIPT_PATH" ]]; then - ADOPT_STATE_FILE=$(resolve_active_state_file "$LOOP_DIR") - if [[ -n "$ADOPT_STATE_FILE" ]] && [[ -n "$HOOK_SESSION_ID" ]]; then - STORED_SID_ADOPT=$(sed -n '/^---$/,/^---$/{ /^'"${FIELD_SESSION_ID}"':/{ s/^'"${FIELD_SESSION_ID}"': *//; p; } }' "$ADOPT_STATE_FILE" 2>/dev/null | tr -d ' ') - if [[ -n "$STORED_SID_ADOPT" ]] && [[ "$STORED_SID_ADOPT" != "$HOOK_SESSION_ID" ]]; then - # Portable in-place rewrite. Failure is logged but non-fatal: - # worst case the next stop re-adopts via the marker pathway. - if ! sed -i.bak -E "s|^(${FIELD_SESSION_ID}:).*$|\\1 $HOOK_SESSION_ID|" "$ADOPT_STATE_FILE" 2>/dev/null; then - echo "Warning: failed to adopt session_id in $ADOPT_STATE_FILE" >&2 - fi - rm -f "${ADOPT_STATE_FILE}.bak" 2>/dev/null || true - fi - fi rm -f "$LOOP_DIR/bg-pending.marker" 2>/dev/null || true fi diff --git a/tests/test-stop-hook-bg-allow.sh b/tests/test-stop-hook-bg-allow.sh index 71ffb9e0..b42b9423 100755 --- a/tests/test-stop-hook-bg-allow.sh +++ b/tests/test-stop-hook-bg-allow.sh @@ -552,17 +552,17 @@ else fi # ---------------- AC-11 / AC-11b ---------------- -# Orphan prevention: when the short-circuit parks a loop waiting for a -# background task and the user closes that Claude session, a fresh -# session must still be able to pick up the loop. The short-circuit -# writes `bg-pending.marker` into the loop dir; find_active_loop -# accepts a stored-vs-filter session_id mismatch iff the marker is -# present. Without this cross-session adoption path, state.md would -# be stranded with the dead session_id and require manual cancel. -echo "Test AC-11: cross-session bg-pending.marker allows pickup" +# Cross-session parked-loop guard: when a loop in the repo carries the +# bg-pending.marker and its stored session_id does not match the caller, +# the stop hook must exit 0 with a dedicated "parked by another session" +# systemMessage and leave every on-disk artifact intact. The current +# session has no authority to advance or cleanup a foreign parked loop +# because its transcript cannot observe the other session's bg task. +echo "Test AC-11: cross-session bg-pending.marker emits 'parked' systemMessage" AC11_REPO="$TEST_DIR/ac11" AC11_LOOP=$(create_full_fixture "$AC11_REPO") AC11_STATE="$AC11_LOOP/state.md" +AC11_MARKER="$AC11_LOOP/bg-pending.marker" # Override state.md with an explicit stored session_id so find_active_loop # sees a real mismatch when we later pass a different session_id. @@ -588,11 +588,10 @@ agent_teams: false session_id: session_alpha --- EOF_AC11 +AC11_STATE_HASH_BEFORE=$(sha256sum "$AC11_STATE" | awk '{print $1}') -# Simulate the state left by a previous session that took the short-circuit -# and then died (Claude window closed). The marker is the public contract -# between the short-circuit path and cross-session pickup. -: > "$AC11_LOOP/bg-pending.marker" +# Simulate the state left by a previous session that took the short-circuit. +: > "$AC11_MARKER" AC11_TRANSCRIPT="$TRANSCRIPTS_DIR/ac11.jsonl" AC11_LAUNCH=$(emit_tool_use_assistant "toolu_I" "Agent" ',"description":"x","prompt":"x"') @@ -602,9 +601,19 @@ write_transcript "$AC11_TRANSCRIPT" "$AC11_LAUNCH" "$AC11_RESULT" AC11_INPUT=$(jq -c -n --arg tp "$AC11_TRANSCRIPT" \ '{transcript_path:$tp, session_id:"session_beta"}') run_stop_hook_with_input "$AC11_REPO" "$AC11_INPUT" -assert_systemmessage_only \ - "AC-11: cross-session bg-pending.marker allows pickup and short-circuit" \ - "$AC11_REPO" "$AC11_STATE" "1 background task" +AC11_SYS_MSG=$(printf '%s' "$RUN_OUTPUT" | jq -r '.systemMessage // empty' 2>/dev/null || echo "") +AC11_STATE_HASH_AFTER=$(sha256sum "$AC11_STATE" | awk '{print $1}') +if [[ "$RUN_EXIT_CODE" -eq 0 ]] \ + && [[ ! -f "$RUN_MARKER" ]] \ + && [[ -f "$AC11_MARKER" ]] \ + && [[ "$AC11_STATE_HASH_BEFORE" == "$AC11_STATE_HASH_AFTER" ]] \ + && printf '%s' "$AC11_SYS_MSG" | grep -qi "parked"; then + pass "AC-11: cross-session stop exits with 'parked' systemMessage; marker and session_id untouched" +else + fail "AC-11: cross-session stop exits with 'parked' systemMessage; marker and session_id untouched" \ + "exit 0 + systemMessage matches /parked/ + marker stays + state.md byte-identical + no Codex" \ + "exit $RUN_EXIT_CODE, codex_marker=$(test -f "$RUN_MARKER" && echo present || echo missing), bg_marker=$(test -f "$AC11_MARKER" && echo present || echo missing), state_unchanged=$([[ "$AC11_STATE_HASH_BEFORE" == "$AC11_STATE_HASH_AFTER" ]] && echo yes || echo no), systemMessage='$AC11_SYS_MSG'; output: $RUN_OUTPUT" +fi # Negative counterpart: same session mismatch but NO marker must still # reject the loop (preserving the existing session-bound isolation when @@ -782,14 +791,17 @@ else fi # ---------------- AC-14 ---------------- -# Cross-session resume: a different session walks in, finds the loop through -# the marker fallback, and the non-short-circuit path must rewrite the -# stored session_id so future same-session stops use the exact-match path -# instead of re-adopting via a stale marker. -echo "Test AC-14: cross-session resume rewrites session_id and removes marker" +# Anti-hijack: a different session walking in MUST NOT rewrite the stored +# session_id and MUST NOT delete bg-pending.marker, even when its own +# transcript shows no pending bg events. The foreign session's transcript +# cannot observe the parking session's bg activity, so nothing the new +# session sees is authoritative. The cross-session guard takes over +# instead. +echo "Test AC-14: cross-session stop preserves marker and stored session_id" AC14_REPO="$TEST_DIR/ac14" AC14_LOOP=$(create_full_fixture "$AC14_REPO") AC14_STATE="$AC14_LOOP/state.md" +AC14_MARKER="$AC14_LOOP/bg-pending.marker" AC14_BRANCH=$(git -C "$AC14_REPO" rev-parse --abbrev-ref HEAD) AC14_BASE_COMMIT=$(git -C "$AC14_REPO" rev-parse HEAD) cat > "$AC14_STATE" < "$AC14_LOOP/bg-pending.marker" +: > "$AC14_MARKER" AC14_TRANSCRIPT="$TRANSCRIPTS_DIR/ac14.jsonl" write_transcript "$AC14_TRANSCRIPT" '{"type":"user","message":{"role":"user","content":"hello"}}' @@ -820,18 +832,18 @@ AC14_INPUT=$(jq -c -n --arg tp "$AC14_TRANSCRIPT" \ '{transcript_path:$tp, session_id:"session_home"}') run_stop_hook_with_input "$AC14_REPO" "$AC14_INPUT" -if [[ ! -f "$AC14_LOOP/bg-pending.marker" ]]; then - pass "AC-14: marker removed after cross-session resume" +if [[ -f "$AC14_MARKER" ]]; then + pass "AC-14: cross-session stop preserves bg-pending.marker" else - fail "AC-14: marker removed after cross-session resume" \ - "marker absent" "marker still present" + fail "AC-14: cross-session stop preserves bg-pending.marker" \ + "marker still present" "marker was removed (foreign-session hijack)" fi -if grep -q "^session_id: session_home$" "$AC14_STATE"; then - pass "AC-14b: state.md session_id rewritten to the current session on adoption" +if grep -q "^session_id: session_foreign$" "$AC14_STATE"; then + pass "AC-14b: cross-session stop leaves stored session_id intact" else - fail "AC-14b: state.md session_id rewritten to the current session on adoption" \ - "session_id: session_home" "$(grep '^session_id:' "$AC14_STATE" || echo '(missing)')" + fail "AC-14b: cross-session stop leaves stored session_id intact" \ + "session_id: session_foreign" "$(grep '^session_id:' "$AC14_STATE" || echo '(missing)')" fi # ---------------- AC-15 ---------------- @@ -985,5 +997,49 @@ else "marker=$(test -f "$AC17C_LOOP/bg-pending.marker" && echo present || echo missing); session_id=$(grep '^session_id:' "$AC17C_STATE" || echo '(missing)')" fi +# ---------------- AC-18 ---------------- +# Validator isolation: find_active_loop's marker-based adoption is opt-in +# via its third positional argument. Default callers (read/write/bash/etc. +# validators) must continue to see strict session-id isolation; a parked +# loop for a different session must NOT become visible to them through a +# bg-pending.marker. +echo "Test AC-18: find_active_loop default invocation ignores foreign marker" +AC18_BASE="$TEST_DIR/ac18-loops" +mkdir -p "$AC18_BASE/2026-03-02_00-00-00" +cat > "$AC18_BASE/2026-03-02_00-00-00/state.md" <<'EOF_AC18' +--- +current_round: 0 +max_iterations: 42 +codex_model: gpt-5.4 +codex_effort: high +session_id: session_foreign +--- +EOF_AC18 +: > "$AC18_BASE/2026-03-02_00-00-00/bg-pending.marker" + +AC18_DEFAULT=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + find_active_loop "$AC18_BASE" "session_home" +) +if [[ -z "$AC18_DEFAULT" ]]; then + pass "AC-18: find_active_loop default (no opt-in) ignores foreign marker dir" +else + fail "AC-18: find_active_loop default (no opt-in) ignores foreign marker dir" \ + "empty result (validators stay isolated)" "got: $AC18_DEFAULT" +fi + +AC18_OPTIN=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + find_active_loop "$AC18_BASE" "session_home" true +) +if [[ "$AC18_OPTIN" == "$AC18_BASE/2026-03-02_00-00-00" ]]; then + pass "AC-18b: find_active_loop with opt-in does return the marker dir" +else + fail "AC-18b: find_active_loop with opt-in does return the marker dir" \ + "$AC18_BASE/2026-03-02_00-00-00" "$AC18_OPTIN" +fi + print_test_summary "Stop Hook Background-Task Allow Test Summary" exit $? From 7fb303804e33037a786b3561dd1da9c330a804f3 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 15:07:27 -0700 Subject: [PATCH 63/97] Tighten cross-session guard and marker cleanup Two blocking findings from the previous review: [P1] The cross-session parked-loop guard required both a non-empty stored session_id AND a non-empty HOOK_SESSION_ID. Callers that reach the hook without a session_id field -- including scripts/rlcr-stop-gate.sh invoked without --session-id -- bypassed the guard, and a foreign parked loop fell through into the normal path. With any readable transcript, the later cleanup deleted bg-pending.marker and the hook started reviewing a loop it did not own. [P2] Non-short-circuit cleanup only verified that the transcript file existed. list_pending_background_task_ids is fail-closed on malformed or truncated transcripts, yet the cleanup still deleted the marker in that case. The parked-state signal was lost even though background completion was never verified. Fixes, limited to loop-codex-stop-hook.sh: * Cross-session guard: drop the `-n "$HOOK_SESSION_ID"` clause. A non-empty stored session_id that differs from the (possibly empty) hook session_id now triggers the "parked by another Claude session" exit path. Backward-compat semantics are preserved: an empty stored session_id still matches any caller, consistent with find_active_loop's existing rule. * Non-short-circuit cleanup: call list_pending_background_task_ids inline and check its exit code along with its output. The marker is removed only when the helper returned exit 0 AND produced an empty id list. Every fail-closed path (missing file, empty path, jq parse failure, truncation) now leaves the marker intact. No changes to hooks/lib/loop-common.sh; the helper already has the exit-code semantics we rely on. Regressions in tests/test-stop-hook-bg-allow.sh: AC-19 Hook input with NO session_id key + state.md session_id populated + bg-pending.marker -> exit 0 with "parked" systemMessage, marker preserved, state.md byte-identical. AC-20 Hook input pointing transcript_path at a deliberately malformed JSONL file + bg-pending.marker -> marker preserved. Validation: - bash tests/test-stop-hook-bg-allow.sh -> 32 passed, 0 failed - bash tests/run-all-tests.sh -> 1711 passed, 0 failed - HOME=/nonexistent/readonly bash tests/test-stop-hook-bg-allow.sh -> 32 passed, 0 failed systemMessage wording unchanged. Version stays at 1.16.0. --- hooks/loop-codex-stop-hook.sh | 33 +++++--- tests/test-stop-hook-bg-allow.sh | 138 +++++++++++++++++++++++++++++-- 2 files changed, 151 insertions(+), 20 deletions(-) diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 58bf6c1e..d0d3af30 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -82,8 +82,13 @@ if [[ -f "$LOOP_DIR/bg-pending.marker" ]]; then GUARD_STATE_FILE=$(resolve_active_state_file "$LOOP_DIR") if [[ -n "$GUARD_STATE_FILE" ]]; then GUARD_STORED_SID=$(sed -n '/^---$/,/^---$/{ /^'"${FIELD_SESSION_ID}"':/{ s/^'"${FIELD_SESSION_ID}"': *//; p; } }' "$GUARD_STATE_FILE" 2>/dev/null | tr -d ' ') + # Non-empty stored session_id that differs from the caller's session + # (empty or not) means this is a foreign parked loop. Hook-input + # schemas that omit session_id -- such as rlcr-stop-gate.sh invoked + # without --session-id -- still get a mismatch here and take the + # safe exit path. An empty stored session_id keeps the existing + # backward-compat "matches any" semantics from find_active_loop. if [[ -n "$GUARD_STORED_SID" ]] \ - && [[ -n "$HOOK_SESSION_ID" ]] \ && [[ "$GUARD_STORED_SID" != "$HOOK_SESSION_ID" ]]; then jq -n \ '{systemMessage: "RLCR loop in this repo is parked by another Claude session waiting for background work. Stop allowed; your session leaves the loop untouched. If that session ended, run /humanize:cancel-rlcr-loop to clean up."}' @@ -125,16 +130,22 @@ fi # has now come back with a transcript showing no pending bg events. # Remove the stale marker before the normal flow takes over. # -# Guard: only run when we could actually inspect the transcript. -# `has_pending_background_tasks` is fail-closed and also returns false -# when the transcript is missing or unreadable (e.g. rlcr-stop-gate.sh -# invoked without --transcript-path). In that case the "no pending" -# signal is not authoritative, so the marker stays in place to keep -# cross-session recovery reachable. -if [[ -f "$LOOP_DIR/bg-pending.marker" ]] \ - && [[ -n "$HOOK_TRANSCRIPT_PATH" ]] \ - && [[ -f "$HOOK_TRANSCRIPT_PATH" ]]; then - rm -f "$LOOP_DIR/bg-pending.marker" 2>/dev/null || true +# Two-part guard to make sure we never drop the parked-state signal +# without evidence: +# (a) list_pending_background_task_ids returned exit 0 -- the +# transcript was present, readable, AND parsed successfully. +# The helper is fail-closed on missing files, empty paths, +# jq parse failure, and truncation, so a non-zero exit blocks +# cleanup here even when the transcript "file" exists. +# (b) its output is empty -- proves "no pending" was authoritatively +# verified, not inferred from a failure. +# The check uses a single fresh call so we capture both the exit code +# and the emptiness without double-running jq. +if [[ -f "$LOOP_DIR/bg-pending.marker" ]]; then + if PENDING_BG_CHECK=$(list_pending_background_task_ids "$HOOK_TRANSCRIPT_PATH" 2>/dev/null) \ + && [[ -z "$PENDING_BG_CHECK" ]]; then + rm -f "$LOOP_DIR/bg-pending.marker" 2>/dev/null || true + fi fi # ======================================== diff --git a/tests/test-stop-hook-bg-allow.sh b/tests/test-stop-hook-bg-allow.sh index b42b9423..930d6ffc 100755 --- a/tests/test-stop-hook-bg-allow.sh +++ b/tests/test-stop-hook-bg-allow.sh @@ -11,15 +11,26 @@ # # Acceptance criteria exercised here (see # .humanize/rlcr/2026-04-16_13-19-26/goal-tracker.md for authoritative list): -# AC-1 no bg dispatches -> normal Codex flow -# AC-2 pending subagent -> exit 0 + systemMessage -# AC-3 pending shell -> exit 0 + systemMessage -# AC-4 subagent launch + complete -> normal Codex flow -# AC-5 2 subagents + 1 shell -> systemMessage mentions "3 background" -# AC-6 missing transcript path -> normal Codex flow (fail-closed) -# AC-7 no active loop -> exit 0, no systemMessage, no Codex -# AC-8 finalize phase pending bg -> exit 0 + systemMessage -# AC-9 via rlcr-stop-gate.sh -> exit 0 (wrapper ALLOW) +# AC-1 no bg dispatches -> normal Codex flow +# AC-2 pending subagent -> exit 0 + systemMessage +# AC-3 pending shell -> exit 0 + systemMessage +# AC-4 subagent launch + complete -> normal Codex flow +# AC-5 2 subagents + 1 shell -> systemMessage mentions "3 background" +# AC-6 missing transcript path -> normal Codex flow (fail-closed) +# AC-7 no active loop -> exit 0, no systemMessage, no Codex +# AC-8 finalize phase pending bg -> exit 0 + systemMessage +# AC-9 via rlcr-stop-gate.sh -> exit 0 (wrapper ALLOW) +# AC-10 tilde transcript path -> short-circuit fires +# AC-11 cross-session bg-pending.marker -> "parked" systemMessage, artifacts intact +# AC-12 find_active_loop prefers exact session -> returns older exact-match dir +# AC-13 same-session resume -> stale marker removed +# AC-14 cross-session stop with marker -> marker and stored session_id preserved +# AC-15 task_notification completion format -> marks launch completed +# AC-16 mixed legacy + SDK completions -> resolves to empty pending set +# AC-17 unreadable transcript with marker -> marker and session_id preserved +# AC-18 find_active_loop default ignores marker -> validators stay isolated +# AC-19 hook input omits session_id -> cross-session guard fires +# AC-20 malformed transcript with marker -> marker preserved (fail-closed) # set -euo pipefail @@ -1041,5 +1052,114 @@ else "$AC18_BASE/2026-03-02_00-00-00" "$AC18_OPTIN" fi +# ---------------- AC-19 ---------------- +# Empty-session caller must still be treated as "foreign" for a parked +# loop whose stored session_id is non-empty. Real trigger: callers such +# as scripts/rlcr-stop-gate.sh invoked without --session-id reach the +# hook with no session_id key at all. +echo "Test AC-19: cross-session guard fires when hook input omits session_id" +AC19_REPO="$TEST_DIR/ac19" +AC19_LOOP=$(create_full_fixture "$AC19_REPO") +AC19_STATE="$AC19_LOOP/state.md" +AC19_MARKER="$AC19_LOOP/bg-pending.marker" +AC19_BRANCH=$(git -C "$AC19_REPO" rev-parse --abbrev-ref HEAD) +AC19_BASE_COMMIT=$(git -C "$AC19_REPO" rev-parse HEAD) +cat > "$AC19_STATE" < "$AC19_MARKER" + +# Transcript exists and is a readable, well-formed minimal record. The +# guard must rely on the stored-vs-current session_id mismatch alone, +# not on transcript readability, to detect the foreign-session case. +AC19_TRANSCRIPT="$TRANSCRIPTS_DIR/ac19.jsonl" +write_transcript "$AC19_TRANSCRIPT" '{"type":"user","message":{"role":"user","content":"hello"}}' + +# Hook input without any session_id key (mirrors rlcr-stop-gate.sh +# invoked without --session-id). +AC19_INPUT=$(jq -c -n --arg tp "$AC19_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC19_REPO" "$AC19_INPUT" +AC19_SYS_MSG=$(printf '%s' "$RUN_OUTPUT" | jq -r '.systemMessage // empty' 2>/dev/null || echo "") +AC19_STATE_HASH_AFTER=$(sha256sum "$AC19_STATE" | awk '{print $1}') +if [[ "$RUN_EXIT_CODE" -eq 0 ]] \ + && [[ ! -f "$RUN_MARKER" ]] \ + && [[ -f "$AC19_MARKER" ]] \ + && [[ "$AC19_STATE_HASH_BEFORE" == "$AC19_STATE_HASH_AFTER" ]] \ + && printf '%s' "$AC19_SYS_MSG" | grep -qi "parked"; then + pass "AC-19: empty hook session_id triggers 'parked' guard; marker and state preserved" +else + fail "AC-19: empty hook session_id triggers 'parked' guard; marker and state preserved" \ + "exit 0 + systemMessage matches /parked/ + marker stays + state.md byte-identical + no Codex" \ + "exit $RUN_EXIT_CODE, codex_marker=$(test -f "$RUN_MARKER" && echo present || echo missing), bg_marker=$(test -f "$AC19_MARKER" && echo present || echo missing), state_unchanged=$([[ "$AC19_STATE_HASH_BEFORE" == "$AC19_STATE_HASH_AFTER" ]] && echo yes || echo no), systemMessage='$AC19_SYS_MSG'; output: $RUN_OUTPUT" +fi + +# ---------------- AC-20 ---------------- +# Non-short-circuit cleanup must not drop bg-pending.marker when the +# transcript exists but cannot be parsed. The helper is fail-closed on +# malformed JSON; that failure must NOT be treated as "no pending". +echo "Test AC-20: malformed transcript preserves bg-pending.marker" +AC20_REPO="$TEST_DIR/ac20" +AC20_LOOP=$(create_full_fixture "$AC20_REPO") +AC20_STATE="$AC20_LOOP/state.md" +AC20_MARKER="$AC20_LOOP/bg-pending.marker" +AC20_BRANCH=$(git -C "$AC20_REPO" rev-parse --abbrev-ref HEAD) +AC20_BASE_COMMIT=$(git -C "$AC20_REPO" rev-parse HEAD) +cat > "$AC20_STATE" < "$AC20_MARKER" + +# Write a deliberately malformed transcript (truncated JSON object) so +# list_pending_background_task_ids's jq invocations fail the parse. +AC20_TRANSCRIPT="$TRANSCRIPTS_DIR/ac20.jsonl" +printf '%s\n' '{"type":"user","message":' > "$AC20_TRANSCRIPT" + +AC20_INPUT=$(jq -c -n --arg tp "$AC20_TRANSCRIPT" \ + '{transcript_path:$tp, session_id:"session_home"}') +run_stop_hook_with_input "$AC20_REPO" "$AC20_INPUT" + +if [[ -f "$AC20_MARKER" ]]; then + pass "AC-20: malformed transcript preserves bg-pending.marker" +else + fail "AC-20: malformed transcript preserves bg-pending.marker" \ + "marker still present (cleanup must not fire on fail-closed helper)" \ + "marker was removed" +fi + print_test_summary "Stop Hook Background-Task Allow Test Summary" exit $? From 7538f742cd7af4c04391d18d236c39581e4e8a35 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 15:24:19 -0700 Subject: [PATCH 64/97] Silence ambiguous callers and scope transcript scan to this loop Two blocking findings from the previous review: [P1] Round 7 dropped the `-n "$HOOK_SESSION_ID"` check from the cross-session guard to close its own P1. That inverted the failure mode: any wrapper call (e.g. rlcr-stop-gate.sh without --session-id) matched the guard forever once the marker was written, and the RLCR loop never resumed through the wrapper path. [P2] list_pending_background_task_ids scanned the entire session-wide Claude transcript. A long-running background Agent/Bash that started earlier in the same session -- before the current RLCR loop was created -- counted as "pending" for this loop. The short-circuit fired forever for a loop that had no in-scope pending work of its own. Fixes: * loop-codex-stop-hook.sh: - Add an "Ambiguous-Caller Marker Guard" before the cross- session guard. When bg-pending.marker is present AND HOOK_SESSION_ID is empty, exit 0 silently (no systemMessage, no on-disk mutation). The real Claude stop hook always has session_id populated and remains the only authoritative driver for parking and cleanup. - Restore `[[ -n "$HOOK_SESSION_ID" ]]` inside the cross- session guard. That branch now fires only when both session ids are non-empty and different. - Compute LOOP_START_TS via derive_loop_start_iso_ts once and pass it through every pending-tasks helper call. * loop-common.sh: - New derive_loop_start_iso_ts helper: parses the loop dir basename YYYY-MM-DD_HH-MM-SS and emits YYYY-MM-DDTHH:MM:SS.000Z for lexical comparison against transcript timestamps. - list_pending_background_task_ids gains an optional since_ts argument. Launch events are filtered by ($since_ts == "" or (.timestamp // "") == "" or (.timestamp // "") >= $since_ts). Empty since_ts preserves old scan-everything behavior; events without .timestamp remain included for fixture / older record compatibility. - has_pending_background_tasks and count_pending_background_tasks pass since_ts through unchanged. Regressions in tests/test-stop-hook-bg-allow.sh: AC-10c refixtured to avoid the AC-10 marker leaking into the wrapper ambiguous-caller branch. AC-19 rewritten: empty HOOK_SESSION_ID + marker -> silent ALLOW, marker and state preserved (inverts Round 7's "parked" expectation). AC-21 helper filters a pre-loop launch and keeps an in-loop launch. AC-21b derive_loop_start_iso_ts produces the expected ISO-8601 form. AC-21c end-to-end: pre-loop launch in transcript does not trigger the short-circuit; Codex runs. AC-22 wrapper without --session-id + no prior marker + pending bg -> writes marker, surfaces systemMessage. AC-22b wrapper without --session-id + prior marker -> silent ALLOW, marker and state preserved. Validation: - bash tests/test-stop-hook-bg-allow.sh -> 37 passed, 0 failed - bash tests/run-all-tests.sh -> 1716 passed, 0 failed - HOME=/nonexistent/readonly bash tests/test-stop-hook-bg-allow.sh -> 37 passed, 0 failed systemMessage wording unchanged on existing paths. Version stays at 1.16.0. --- hooks/lib/loop-common.sh | 53 +++++++- hooks/loop-codex-stop-hook.sh | 44 +++++-- tests/test-stop-hook-bg-allow.sh | 215 +++++++++++++++++++++++++++++-- 3 files changed, 282 insertions(+), 30 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index b8037614..2b71e778 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -267,6 +267,31 @@ extract_transcript_path() { expand_leading_tilde "$raw" } +# Convert an RLCR loop dir basename to a lexically-comparable ISO-8601 +# timestamp suitable for filtering transcript events. +# +# The setup script creates loop dirs named `YYYY-MM-DD_HH-MM-SS`; real +# Claude transcript events carry timestamps like `2026-04-16T13:19:26.819Z`. +# String comparison works cleanly once we pad the loop boundary with +# `.000Z` so sub-second transcript timestamps in the same second always +# compare greater. +# +# Usage: derive_loop_start_iso_ts "$loop_dir" +# Prints the ISO-8601 timestamp, or empty string when the basename does +# not match the expected format. +derive_loop_start_iso_ts() { + local loop_dir="$1" + local base + base=$(basename "$loop_dir" 2>/dev/null || echo "") + if [[ "$base" =~ ^([0-9]{4}-[0-9]{2}-[0-9]{2})_([0-9]{2})-([0-9]{2})-([0-9]{2})$ ]]; then + printf '%sT%s:%s:%s.000Z' \ + "${BASH_REMATCH[1]}" \ + "${BASH_REMATCH[2]}" \ + "${BASH_REMATCH[3]}" \ + "${BASH_REMATCH[4]}" + fi +} + # Enumerate background-task ids that have been launched but not yet marked # completed in a Claude Code transcript.jsonl. # @@ -290,7 +315,15 @@ extract_transcript_path() { # # pending := launched \ completed # -# Usage: list_pending_background_task_ids "$transcript_path" +# Optional second argument `since_ts` (ISO-8601 string, e.g. the value +# returned by `derive_loop_start_iso_ts`): when provided, only launch +# events whose top-level `.timestamp` field is >= `since_ts` count as +# candidate launches. Events without a `.timestamp` are included (keeps +# fixture transcripts and older record formats working). This keeps +# pre-loop session-wide background work from pinning an RLCR loop that +# has no pending work of its own. +# +# Usage: list_pending_background_task_ids "$transcript_path" [since_ts] # - Outputs one id per line on stdout (possibly empty). # - Returns 0 when the transcript is readable (including when there are # no pending tasks). Returns 1 when the transcript path is empty, not @@ -298,6 +331,7 @@ extract_transcript_path() { # as "unknown -> do not short-circuit". list_pending_background_task_ids() { local transcript_path="$1" + local since_ts="${2:-}" # Normalize a leading tilde so direct callers (tests, ad-hoc scripts) # work correctly even when transcript_path was not routed through @@ -312,8 +346,13 @@ list_pending_background_task_ids() { fi local launched completed - launched=$(jq -r ' + launched=$(jq -r --arg since_ts "$since_ts" ' select(.toolUseResult != null) + | select( + ($since_ts == "" + or ((.timestamp // "") == "") + or ((.timestamp // "") >= $since_ts)) + ) | select( (.toolUseResult.isAsync == true and (.toolUseResult.agentId // "") != "") or ((.toolUseResult.backgroundTaskId // "") != "") @@ -355,22 +394,24 @@ list_pending_background_task_ids() { # Returns 1 when no pending tasks are detected (including fail-closed cases # like missing transcript, non-file path, or jq unavailable). # -# Usage: has_pending_background_tasks "$transcript_path" +# Usage: has_pending_background_tasks "$transcript_path" [since_ts] has_pending_background_tasks() { local transcript_path="$1" + local since_ts="${2:-}" local pending - pending=$(list_pending_background_task_ids "$transcript_path" 2>/dev/null) || return 1 + pending=$(list_pending_background_task_ids "$transcript_path" "$since_ts" 2>/dev/null) || return 1 [[ -n "$pending" ]] } # Prints the count of pending background tasks to stdout. Prints 0 for any # error case so callers can still format messages safely. # -# Usage: count_pending_background_tasks "$transcript_path" +# Usage: count_pending_background_tasks "$transcript_path" [since_ts] count_pending_background_tasks() { local transcript_path="$1" + local since_ts="${2:-}" local pending - pending=$(list_pending_background_task_ids "$transcript_path" 2>/dev/null) || { + pending=$(list_pending_background_task_ids "$transcript_path" "$since_ts" 2>/dev/null) || { echo 0 return 0 } diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index d0d3af30..405c82ef 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -68,6 +68,27 @@ if [[ -z "$LOOP_DIR" ]]; then exit 0 fi +# Shared state used by both guard blocks and the pending-tasks check below. +# Loop-start boundary: derived from the loop dir basename (`YYYY-MM-DD_HH-MM-SS`). +# Empty means derivation failed; helpers treat empty since_ts as no boundary. +LOOP_START_TS=$(derive_loop_start_iso_ts "$LOOP_DIR") +HOOK_TRANSCRIPT_PATH=$(extract_transcript_path "$HOOK_INPUT") + +# ======================================== +# Ambiguous-Caller Marker Guard +# ======================================== +# If a bg-pending.marker is present but we have no session_id on this +# hook invocation (typical of scripts/rlcr-stop-gate.sh invoked without +# --session-id, or any other caller that doesn't forward session_id), +# we cannot tell whether this caller owns the parked loop. Taking either +# branch (foreign-session guard below, or same-session cleanup further +# down) would be wrong in one of the two possible realities. Exit 0 +# silently: the real Claude hook will arrive with session_id populated +# and drive parking / cleanup from an authoritative context. +if [[ -f "$LOOP_DIR/bg-pending.marker" ]] && [[ -z "$HOOK_SESSION_ID" ]]; then + exit 0 +fi + # ======================================== # Cross-Session Parked-Loop Guard # ======================================== @@ -77,18 +98,17 @@ fi # transcript sees none of the foreign bg activity - so the only safe # response is to exit 0 with a distinct systemMessage and leave every # on-disk artifact (state file, stored session_id, marker) untouched. -HOOK_TRANSCRIPT_PATH=$(extract_transcript_path "$HOOK_INPUT") +# +# Both sides of the session-id comparison must be non-empty for this +# branch to trigger: an empty HOOK_SESSION_ID has already exited above +# via the ambiguous-caller guard, and an empty stored session_id keeps +# the backward-compat "matches any" semantics from find_active_loop. if [[ -f "$LOOP_DIR/bg-pending.marker" ]]; then GUARD_STATE_FILE=$(resolve_active_state_file "$LOOP_DIR") if [[ -n "$GUARD_STATE_FILE" ]]; then GUARD_STORED_SID=$(sed -n '/^---$/,/^---$/{ /^'"${FIELD_SESSION_ID}"':/{ s/^'"${FIELD_SESSION_ID}"': *//; p; } }' "$GUARD_STATE_FILE" 2>/dev/null | tr -d ' ') - # Non-empty stored session_id that differs from the caller's session - # (empty or not) means this is a foreign parked loop. Hook-input - # schemas that omit session_id -- such as rlcr-stop-gate.sh invoked - # without --session-id -- still get a mismatch here and take the - # safe exit path. An empty stored session_id keeps the existing - # backward-compat "matches any" semantics from find_active_loop. if [[ -n "$GUARD_STORED_SID" ]] \ + && [[ -n "$HOOK_SESSION_ID" ]] \ && [[ "$GUARD_STORED_SID" != "$HOOK_SESSION_ID" ]]; then jq -n \ '{systemMessage: "RLCR loop in this repo is parked by another Claude session waiting for background work. Stop allowed; your session leaves the loop untouched. If that session ended, run /humanize:cancel-rlcr-loop to clean up."}' @@ -111,10 +131,14 @@ fi # untouched -- the next natural stop (after background work finishes) will # re-enter this hook with no pending tasks and run the normal flow. # +# LOOP_START_TS confines the transcript scan to launches that actually +# happened during this loop; earlier session-wide bg activity cannot pin +# the loop. +# # This check MUST run before any other gate (phase detection, state parsing, # branch / plan / git-clean / summary / max-iter checks, Codex review). -if has_pending_background_tasks "$HOOK_TRANSCRIPT_PATH"; then - PENDING_BG_COUNT=$(count_pending_background_tasks "$HOOK_TRANSCRIPT_PATH") +if has_pending_background_tasks "$HOOK_TRANSCRIPT_PATH" "$LOOP_START_TS"; then + PENDING_BG_COUNT=$(count_pending_background_tasks "$HOOK_TRANSCRIPT_PATH" "$LOOP_START_TS") # Mark the loop as parked; allows the same session to resume later and # makes the cross-session guard above reachable if the user opens a # different Claude session in this repo before the bg task completes. @@ -142,7 +166,7 @@ fi # The check uses a single fresh call so we capture both the exit code # and the emptiness without double-running jq. if [[ -f "$LOOP_DIR/bg-pending.marker" ]]; then - if PENDING_BG_CHECK=$(list_pending_background_task_ids "$HOOK_TRANSCRIPT_PATH" 2>/dev/null) \ + if PENDING_BG_CHECK=$(list_pending_background_task_ids "$HOOK_TRANSCRIPT_PATH" "$LOOP_START_TS" 2>/dev/null) \ && [[ -z "$PENDING_BG_CHECK" ]]; then rm -f "$LOOP_DIR/bg-pending.marker" 2>/dev/null || true fi diff --git a/tests/test-stop-hook-bg-allow.sh b/tests/test-stop-hook-bg-allow.sh index 930d6ffc..9fe2ba5b 100755 --- a/tests/test-stop-hook-bg-allow.sh +++ b/tests/test-stop-hook-bg-allow.sh @@ -541,12 +541,27 @@ fi # Verify the gate wrapper path with a tilde-form --transcript-path also # reaches the short-circuit. AC-9 uses an absolute transcript path; this # covers the same code path with a "~/..." form. +# +# Fresh fixture so the repo has no prior bg-pending.marker (AC-10 left +# one behind). The ambiguous-caller guard in the hook only silences the +# wrapper when a marker already exists; a clean repo falls through to +# the normal short-circuit so the systemMessage surfaces in the wrapper +# output. echo "Test AC-10c: rlcr-stop-gate.sh with '~/...' --transcript-path -> ALLOW" +AC10C_REPO="$TEST_DIR/ac10c" +create_full_fixture "$AC10C_REPO" > /dev/null +mkdir -p "$FAKE_HOME/session-data-c" +AC10C_TRANSCRIPT="$FAKE_HOME/session-data-c/ac10c.jsonl" +AC10C_LAUNCH=$(emit_tool_use_assistant "toolu_H2" "Agent" ',"description":"x","prompt":"x"') +AC10C_RESULT=$(emit_async_agent_launch_result "toolu_H2" "agent_pending_H2") +write_transcript "$AC10C_TRANSCRIPT" "$AC10C_LAUNCH" "$AC10C_RESULT" +AC10C_TILDE_PATH="~/session-data-c/ac10c.jsonl" + AC10C_OUT="$TEST_DIR/ac10c-out.txt" set +e ( - cd "$AC10_REPO" - HOME="$FAKE_HOME" "$GATE_SCRIPT" --transcript-path "$AC10_TILDE_PATH" + cd "$AC10C_REPO" + HOME="$FAKE_HOME" "$GATE_SCRIPT" --transcript-path "$AC10C_TILDE_PATH" ) > "$AC10C_OUT" 2>&1 AC10C_EXIT=$? set -e @@ -1053,11 +1068,14 @@ else fi # ---------------- AC-19 ---------------- -# Empty-session caller must still be treated as "foreign" for a parked -# loop whose stored session_id is non-empty. Real trigger: callers such -# as scripts/rlcr-stop-gate.sh invoked without --session-id reach the -# hook with no session_id key at all. -echo "Test AC-19: cross-session guard fires when hook input omits session_id" +# Empty-session caller + bg-pending.marker present: the caller might be +# the parked loop's owner invoking through a wrapper that didn't forward +# session_id, OR it might be a different session. The hook cannot tell +# them apart from the input, so the safe response is `exit 0` silently +# with no systemMessage and no on-disk mutation. The real Claude stop +# hook (which always has session_id populated) drives actual parking and +# cleanup. +echo "Test AC-19: ambiguous caller (empty session_id + marker) exits silently" AC19_REPO="$TEST_DIR/ac19" AC19_LOOP=$(create_full_fixture "$AC19_REPO") AC19_STATE="$AC19_LOOP/state.md" @@ -1087,9 +1105,6 @@ EOF_AC19 AC19_STATE_HASH_BEFORE=$(sha256sum "$AC19_STATE" | awk '{print $1}') : > "$AC19_MARKER" -# Transcript exists and is a readable, well-formed minimal record. The -# guard must rely on the stored-vs-current session_id mismatch alone, -# not on transcript readability, to detect the foreign-session case. AC19_TRANSCRIPT="$TRANSCRIPTS_DIR/ac19.jsonl" write_transcript "$AC19_TRANSCRIPT" '{"type":"user","message":{"role":"user","content":"hello"}}' @@ -1103,11 +1118,11 @@ if [[ "$RUN_EXIT_CODE" -eq 0 ]] \ && [[ ! -f "$RUN_MARKER" ]] \ && [[ -f "$AC19_MARKER" ]] \ && [[ "$AC19_STATE_HASH_BEFORE" == "$AC19_STATE_HASH_AFTER" ]] \ - && printf '%s' "$AC19_SYS_MSG" | grep -qi "parked"; then - pass "AC-19: empty hook session_id triggers 'parked' guard; marker and state preserved" + && [[ -z "$AC19_SYS_MSG" ]]; then + pass "AC-19: ambiguous caller exits silently; marker and state.md preserved" else - fail "AC-19: empty hook session_id triggers 'parked' guard; marker and state preserved" \ - "exit 0 + systemMessage matches /parked/ + marker stays + state.md byte-identical + no Codex" \ + fail "AC-19: ambiguous caller exits silently; marker and state.md preserved" \ + "exit 0 + no systemMessage + marker stays + state.md byte-identical + no Codex" \ "exit $RUN_EXIT_CODE, codex_marker=$(test -f "$RUN_MARKER" && echo present || echo missing), bg_marker=$(test -f "$AC19_MARKER" && echo present || echo missing), state_unchanged=$([[ "$AC19_STATE_HASH_BEFORE" == "$AC19_STATE_HASH_AFTER" ]] && echo yes || echo no), systemMessage='$AC19_SYS_MSG'; output: $RUN_OUTPUT" fi @@ -1161,5 +1176,177 @@ else "marker was removed" fi +# ---------------- AC-21 ---------------- +# Transcript scan boundary: the Claude transcript is session-wide and +# can contain background launches that predate the RLCR loop. The +# helper filters launch events by `.timestamp >= since_ts` (derived +# from the loop dir basename) so only launches made after the loop +# started count as pending. +echo "Test AC-21: pre-loop launches are filtered out by since_ts" +AC21_TRANSCRIPT="$TRANSCRIPTS_DIR/ac21.jsonl" + +# The loop boundary used throughout the suite's fixtures is +# 2026-03-01 00:00:00. Build two launches: one BEFORE that boundary +# (should be filtered) and one AFTER (should still count as pending). +AC21_PRE_LAUNCH=$(jq -c -n '{ + type:"user", + timestamp:"2026-02-28T10:00:00.000Z", + toolUseResult:{isAsync:true, agentId:"agent_pre_loop"} +}') +AC21_POST_LAUNCH=$(jq -c -n '{ + type:"user", + timestamp:"2026-03-01T10:00:00.000Z", + toolUseResult:{isAsync:true, agentId:"agent_in_loop"} +}') +write_transcript "$AC21_TRANSCRIPT" "$AC21_PRE_LAUNCH" "$AC21_POST_LAUNCH" + +AC21_SINCE="2026-03-01T00:00:00.000Z" +AC21_FILTERED=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + list_pending_background_task_ids "$AC21_TRANSCRIPT" "$AC21_SINCE" 2>/dev/null | sort -u +) +if [[ "$AC21_FILTERED" == "agent_in_loop" ]]; then + pass "AC-21: list_pending_background_task_ids filters launches before since_ts" +else + fail "AC-21: list_pending_background_task_ids filters launches before since_ts" \ + "only 'agent_in_loop' (pre-loop launch excluded)" "got: $AC21_FILTERED" +fi + +# AC-21b: confirm the derive helper produces the expected ISO-8601 form +# so real callers get a matching boundary. +AC21B_DERIVED=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + derive_loop_start_iso_ts "/tmp/.humanize/rlcr/2026-03-01_00-00-00" +) +if [[ "$AC21B_DERIVED" == "2026-03-01T00:00:00.000Z" ]]; then + pass "AC-21b: derive_loop_start_iso_ts emits ISO-8601 with .000Z suffix" +else + fail "AC-21b: derive_loop_start_iso_ts emits ISO-8601 with .000Z suffix" \ + "2026-03-01T00:00:00.000Z" "$AC21B_DERIVED" +fi + +# AC-21c: end-to-end through the stop hook. Pre-loop launch only -> hook +# must NOT short-circuit (no pending bg "belongs" to this loop). +echo "Test AC-21c: stop hook ignores pre-loop launches for this loop" +AC21C_REPO="$TEST_DIR/ac21c" +AC21C_LOOP=$(create_full_fixture "$AC21C_REPO") +AC21C_MARKER="$AC21C_LOOP/bg-pending.marker" +AC21C_TRANSCRIPT="$TRANSCRIPTS_DIR/ac21c.jsonl" +write_transcript "$AC21C_TRANSCRIPT" "$AC21_PRE_LAUNCH" +AC21C_INPUT=$(jq -c -n --arg tp "$AC21C_TRANSCRIPT" \ + '{transcript_path:$tp, session_id:"session_home"}') +run_stop_hook_with_input "$AC21C_REPO" "$AC21C_INPUT" + +# With the pre-loop launch filtered out, the transcript has no in-loop +# pending bg -> no short-circuit -> no marker written -> hook proceeds +# to the normal flow (which will call Codex in this fixture). +if [[ ! -f "$AC21C_MARKER" ]] && [[ -f "$RUN_MARKER" ]]; then + pass "AC-21c: pre-loop launch does not write bg-pending.marker; Codex runs" +else + fail "AC-21c: pre-loop launch does not write bg-pending.marker; Codex runs" \ + "no bg marker AND Codex invoked" \ + "bg_marker=$(test -f "$AC21C_MARKER" && echo present || echo missing); codex_marker=$(test -f "$RUN_MARKER" && echo present || echo missing)" +fi + +# ---------------- AC-22 ---------------- +# Wrapper without --session-id on a repo that has NO marker: should +# behave just like the normal same-session path, i.e. a pending bg in +# the transcript writes the marker and the wrapper output surfaces the +# "background task" systemMessage. This confirms the ambiguous-caller +# guard only fires on a pre-existing marker, not on every no-session +# call. +echo "Test AC-22: wrapper without session_id, no prior marker, pending bg -> ALLOW with systemMessage" +AC22_REPO="$TEST_DIR/ac22" +create_full_fixture "$AC22_REPO" > /dev/null +AC22_LOOP="$AC22_REPO/.humanize/rlcr/2026-03-01_00-00-00" +AC22_MARKER="$AC22_LOOP/bg-pending.marker" +AC22_TRANSCRIPT="$TRANSCRIPTS_DIR/ac22.jsonl" +AC22_LAUNCH=$(jq -c -n '{ + type:"user", + timestamp:"2026-03-01T10:00:00.000Z", + toolUseResult:{isAsync:true, agentId:"agent_wrapper_pending"} +}') +write_transcript "$AC22_TRANSCRIPT" "$AC22_LAUNCH" + +AC22_OUT="$TEST_DIR/ac22-out.txt" +set +e +( + cd "$AC22_REPO" + "$GATE_SCRIPT" --transcript-path "$AC22_TRANSCRIPT" +) > "$AC22_OUT" 2>&1 +AC22_EXIT=$? +set -e + +if [[ "$AC22_EXIT" -eq 0 ]] \ + && grep -q "^ALLOW:" "$AC22_OUT" \ + && grep -q "background task" "$AC22_OUT" \ + && [[ -f "$AC22_MARKER" ]]; then + pass "AC-22: wrapper without session_id + no prior marker + pending bg -> writes marker, surfaces systemMessage" +else + AC22_BODY=$(cat "$AC22_OUT" 2>/dev/null || true) + fail "AC-22: wrapper without session_id + no prior marker + pending bg -> writes marker, surfaces systemMessage" \ + "exit 0 + ALLOW + 'background task' + marker written" \ + "exit $AC22_EXIT; marker=$(test -f "$AC22_MARKER" && echo present || echo missing); output: $AC22_BODY" +fi + +# AC-22b: wrapper without --session-id on a repo that ALREADY has a +# marker (e.g. set up by a prior hook call). Must exit 0 silently -- no +# systemMessage, no state mutation. Mirrors the real scenario Codex +# flagged: rlcr-stop-gate.sh re-run by an unaware caller. +echo "Test AC-22b: wrapper without session_id, prior marker -> silent ALLOW" +AC22B_REPO="$TEST_DIR/ac22b" +AC22B_LOOP=$(create_full_fixture "$AC22B_REPO") +AC22B_STATE="$AC22B_LOOP/state.md" +AC22B_MARKER="$AC22B_LOOP/bg-pending.marker" +AC22B_BRANCH=$(git -C "$AC22B_REPO" rev-parse --abbrev-ref HEAD) +AC22B_BASE_COMMIT=$(git -C "$AC22B_REPO" rev-parse HEAD) +cat > "$AC22B_STATE" < "$AC22B_MARKER" + +AC22B_OUT="$TEST_DIR/ac22b-out.txt" +set +e +( + cd "$AC22B_REPO" + "$GATE_SCRIPT" +) > "$AC22B_OUT" 2>&1 +AC22B_EXIT=$? +set -e + +AC22B_STATE_HASH_AFTER=$(sha256sum "$AC22B_STATE" | awk '{print $1}') +if [[ "$AC22B_EXIT" -eq 0 ]] \ + && grep -q "^ALLOW:" "$AC22B_OUT" \ + && ! grep -qi "parked" "$AC22B_OUT" \ + && [[ -f "$AC22B_MARKER" ]] \ + && [[ "$AC22B_STATE_HASH_BEFORE" == "$AC22B_STATE_HASH_AFTER" ]]; then + pass "AC-22b: wrapper without session_id + existing marker -> silent ALLOW; marker and state preserved" +else + AC22B_BODY=$(cat "$AC22B_OUT" 2>/dev/null || true) + fail "AC-22b: wrapper without session_id + existing marker -> silent ALLOW; marker and state preserved" \ + "exit 0 + ALLOW: (no 'parked') + marker kept + state.md byte-identical" \ + "exit $AC22B_EXIT; marker=$(test -f "$AC22B_MARKER" && echo present || echo missing); state_unchanged=$([[ "$AC22B_STATE_HASH_BEFORE" == "$AC22B_STATE_HASH_AFTER" ]] && echo yes || echo no); output: $AC22B_BODY" +fi + print_test_summary "Stop Hook Background-Task Allow Test Summary" exit $? From 9c80698f1354d6a1fc4b762acb2328a4526eb00f Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 15:34:59 -0700 Subject: [PATCH 65/97] Normalize loop-start boundary into actual UTC setup-rlcr-loop.sh creates loop dirs named YYYY-MM-DD_HH-MM-SS with `date +%Y-%m-%d_%H-%M-%S` (no -u), so the basename is local wall clock. The previous derive_loop_start_iso_ts pasted `.000Z` on top of that basename, treating the local wall clock as if it were already UTC. Claude transcript events carry real UTC timestamps, so on any non-UTC machine the filter boundary was shifted by the local offset: west-of-UTC users inherited pre-loop background launches and stayed parked forever, east-of-UTC users missed in-loop background work and ran Codex review too early. Fix in hooks/lib/loop-common.sh: * derive_loop_start_iso_ts now does a two-step local -> epoch -> UTC conversion. Local wall clock is parsed into epoch seconds with `date -d` (GNU) or `date -j -f` (BSD/macOS), then the epoch is formatted in UTC as YYYY-MM-DDTHH:MM:SS.000Z with `date -u -d "@"` (GNU) or `date -u -r ` (BSD/macOS). Any failure yields an empty string, which disables the filter in callers -- same backward-compat behaviour as before. Regressions in tests/test-stop-hook-bg-allow.sh: AC-21b pinned to `export TZ=UTC` inside its subshell so the expected 2026-03-01T00:00:00.000Z is TZ-deterministic. AC-21d NEW. TZ=Asia/Tokyo + basename 2026-03-01_09-00-00 -> 2026-03-01T00:00:00.000Z (9am JST = 0am UTC). AC-21e NEW. TZ=America/Los_Angeles + basename 2026-03-01_00-00-00 -> 2026-03-01T08:00:00.000Z (0am PST = 8am UTC; March 1 is before DST starts on March 8, 2026). Validation: - bash tests/test-stop-hook-bg-allow.sh -> 39 passed, 0 failed - bash tests/run-all-tests.sh -> 1718 passed, 0 failed - TZ=America/Los_Angeles bash tests/test-stop-hook-bg-allow.sh -> 39 passed, 0 failed - TZ=Asia/Tokyo bash tests/test-stop-hook-bg-allow.sh -> 39 passed, 0 failed - HOME=/nonexistent/readonly bash tests/test-stop-hook-bg-allow.sh -> 39 passed, 0 failed systemMessage wording unchanged. Version stays at 1.16.0. --- hooks/lib/loop-common.sh | 52 +++++++++++++++++++++++--------- tests/test-stop-hook-bg-allow.sh | 39 ++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 17 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 2b71e778..7e18df9b 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -268,28 +268,52 @@ extract_transcript_path() { } # Convert an RLCR loop dir basename to a lexically-comparable ISO-8601 -# timestamp suitable for filtering transcript events. +# UTC timestamp suitable for filtering transcript events. # -# The setup script creates loop dirs named `YYYY-MM-DD_HH-MM-SS`; real -# Claude transcript events carry timestamps like `2026-04-16T13:19:26.819Z`. -# String comparison works cleanly once we pad the loop boundary with -# `.000Z` so sub-second transcript timestamps in the same second always -# compare greater. +# `setup-rlcr-loop.sh` creates loop dirs named `YYYY-MM-DD_HH-MM-SS` in +# the system's LOCAL wall clock (it calls `date +%Y-%m-%d_%H-%M-%S` +# without `-u`). Claude transcript events carry actual UTC timestamps +# like `2026-04-16T13:19:26.819Z`. To compare them correctly, this +# helper converts the local wall-clock parse back to a real UTC moment +# via a two-step: parse local -> epoch seconds -> format in UTC. +# +# The `.000Z` suffix keeps sub-second transcript timestamps in the same +# second compared greater via lexical string ordering. # # Usage: derive_loop_start_iso_ts "$loop_dir" -# Prints the ISO-8601 timestamp, or empty string when the basename does -# not match the expected format. +# Prints the ISO-8601 UTC timestamp, or empty string when the +# basename does not match the expected format or the local `date` +# binary cannot parse it. derive_loop_start_iso_ts() { local loop_dir="$1" local base base=$(basename "$loop_dir" 2>/dev/null || echo "") - if [[ "$base" =~ ^([0-9]{4}-[0-9]{2}-[0-9]{2})_([0-9]{2})-([0-9]{2})-([0-9]{2})$ ]]; then - printf '%sT%s:%s:%s.000Z' \ - "${BASH_REMATCH[1]}" \ - "${BASH_REMATCH[2]}" \ - "${BASH_REMATCH[3]}" \ - "${BASH_REMATCH[4]}" + if [[ ! "$base" =~ ^([0-9]{4}-[0-9]{2}-[0-9]{2})_([0-9]{2})-([0-9]{2})-([0-9]{2})$ ]]; then + return + fi + local local_datetime + local_datetime="${BASH_REMATCH[1]} ${BASH_REMATCH[2]}:${BASH_REMATCH[3]}:${BASH_REMATCH[4]}" + + # Local wall-clock -> epoch seconds. GNU `date -d` first, + # BSD/macOS `date -j -f ...` second. Both honour the caller's TZ + # for interpretation, matching setup-rlcr-loop.sh's behaviour at + # loop-dir creation time. + local epoch + epoch=$(date -d "$local_datetime" +%s 2>/dev/null) || epoch="" + if [[ -z "$epoch" ]]; then + epoch=$(date -j -f "%Y-%m-%d %H:%M:%S" "$local_datetime" +%s 2>/dev/null) || epoch="" + fi + if [[ -z "$epoch" ]]; then + return + fi + + # Epoch -> UTC ISO-8601. Try GNU then BSD. + local utc_iso + utc_iso=$(date -u -d "@$epoch" "+%Y-%m-%dT%H:%M:%S.000Z" 2>/dev/null) || utc_iso="" + if [[ -z "$utc_iso" ]]; then + utc_iso=$(date -u -r "$epoch" "+%Y-%m-%dT%H:%M:%S.000Z" 2>/dev/null) || utc_iso="" fi + printf '%s' "$utc_iso" } # Enumerate background-task ids that have been launched but not yet marked diff --git a/tests/test-stop-hook-bg-allow.sh b/tests/test-stop-hook-bg-allow.sh index 9fe2ba5b..22d0a1dc 100755 --- a/tests/test-stop-hook-bg-allow.sh +++ b/tests/test-stop-hook-bg-allow.sh @@ -1214,19 +1214,52 @@ else fi # AC-21b: confirm the derive helper produces the expected ISO-8601 form -# so real callers get a matching boundary. +# under TZ=UTC, where local wall clock == UTC so no offset is applied. AC21B_DERIVED=$( # shellcheck source=/dev/null source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + export TZ="UTC" derive_loop_start_iso_ts "/tmp/.humanize/rlcr/2026-03-01_00-00-00" ) if [[ "$AC21B_DERIVED" == "2026-03-01T00:00:00.000Z" ]]; then - pass "AC-21b: derive_loop_start_iso_ts emits ISO-8601 with .000Z suffix" + pass "AC-21b: derive_loop_start_iso_ts under TZ=UTC preserves the wall-clock" else - fail "AC-21b: derive_loop_start_iso_ts emits ISO-8601 with .000Z suffix" \ + fail "AC-21b: derive_loop_start_iso_ts under TZ=UTC preserves the wall-clock" \ "2026-03-01T00:00:00.000Z" "$AC21B_DERIVED" fi +# AC-21d: setup-rlcr-loop.sh names the dir with local wall clock, so a +# non-UTC caller must see the boundary shifted into actual UTC. +# JST (UTC+9) example: 09:00 JST == 00:00 UTC. +AC21D_DERIVED=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + export TZ="Asia/Tokyo" + derive_loop_start_iso_ts "/tmp/.humanize/rlcr/2026-03-01_09-00-00" +) +if [[ "$AC21D_DERIVED" == "2026-03-01T00:00:00.000Z" ]]; then + pass "AC-21d: derive_loop_start_iso_ts converts JST wall-clock to correct UTC" +else + fail "AC-21d: derive_loop_start_iso_ts converts JST wall-clock to correct UTC" \ + "2026-03-01T00:00:00.000Z (9am JST = 0am UTC)" "$AC21D_DERIVED" +fi + +# AC-21e: PST (UTC-8) example. Pick March 1 which is still PST (DST +# does not start until March 8, 2026), so the offset is a fixed -8h: +# 00:00 PST == 08:00 UTC. +AC21E_DERIVED=$( + # shellcheck source=/dev/null + source "$PROJECT_ROOT/hooks/lib/loop-common.sh" + export TZ="America/Los_Angeles" + derive_loop_start_iso_ts "/tmp/.humanize/rlcr/2026-03-01_00-00-00" +) +if [[ "$AC21E_DERIVED" == "2026-03-01T08:00:00.000Z" ]]; then + pass "AC-21e: derive_loop_start_iso_ts converts PST wall-clock to correct UTC" +else + fail "AC-21e: derive_loop_start_iso_ts converts PST wall-clock to correct UTC" \ + "2026-03-01T08:00:00.000Z (0am PST = 8am UTC before DST)" "$AC21E_DERIVED" +fi + # AC-21c: end-to-end through the stop hook. Pre-loop launch only -> hook # must NOT short-circuit (no pending bg "belongs" to this loop). echo "Test AC-21c: stop hook ignores pre-loop launches for this loop" From 74671f783ea362744ec22b8eaa4f0b75a491715b Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 15:45:24 -0700 Subject: [PATCH 66/97] Pin rlcr-stop-gate project root at every wrapper testsite tests/test-stop-hook-bg-allow.sh invokes rlcr-stop-gate.sh at four spots (AC-9, AC-10c, AC-22, AC-22b). The wrapper resolves its project root as `${CLAUDE_PROJECT_DIR:-$(pwd)}`, giving the env var precedence over `cd`. When the outer runner exports CLAUDE_PROJECT_DIR (the normal case in hosted environments), those four tests were inspecting the outer repo instead of their per-test fixtures, falling through with "ALLOW: stop gate passed." and causing the suite to go red inside tests/run-all-tests.sh. Reproduced directly: CLAUDE_PROJECT_DIR=/tmp/outer-unrelated \ bash tests/test-stop-hook-bg-allow.sh ... FAIL: AC-10c FAIL: AC-22 Fix: pass `--project-root "$FIXTURE_REPO"` at every wrapper call site. The wrapper priority order is explicit flag > CLAUDE_PROJECT_DIR env > cwd, so the gate now pins deterministically to each fixture regardless of inherited environment. No product-code change. Validation: - bash tests/test-stop-hook-bg-allow.sh -> 39 passed, 0 failed - bash tests/run-all-tests.sh -> 1718 passed, 0 failed - CLAUDE_PROJECT_DIR=/tmp/outer-unrelated bash tests/test-stop-hook-bg-allow.sh -> 39 passed, 0 failed - HOME=/nonexistent/readonly bash tests/test-stop-hook-bg-allow.sh -> 39 passed, 0 failed - TZ=America/Los_Angeles bash tests/test-stop-hook-bg-allow.sh -> 39 passed, 0 failed - TZ=Asia/Tokyo bash tests/test-stop-hook-bg-allow.sh -> 39 passed, 0 failed systemMessage wording unchanged. Version stays at 1.16.0. --- tests/test-stop-hook-bg-allow.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/test-stop-hook-bg-allow.sh b/tests/test-stop-hook-bg-allow.sh index 22d0a1dc..ab9fe571 100755 --- a/tests/test-stop-hook-bg-allow.sh +++ b/tests/test-stop-hook-bg-allow.sh @@ -476,10 +476,12 @@ AC9_RESULT=$(emit_async_agent_launch_result "toolu_G" "agent_pending_G") write_transcript "$AC9_TRANSCRIPT" "$AC9_LAUNCH" "$AC9_RESULT" AC9_OUT="$AC9_REPO/gate-out.txt" +# Pass --project-root explicitly so an inherited CLAUDE_PROJECT_DIR +# from the outer runner cannot redirect the gate to the outer repo. set +e ( cd "$AC9_REPO" - "$GATE_SCRIPT" --transcript-path "$AC9_TRANSCRIPT" + "$GATE_SCRIPT" --project-root "$AC9_REPO" --transcript-path "$AC9_TRANSCRIPT" ) > "$AC9_OUT" 2>&1 AC9_EXIT=$? set -e @@ -561,7 +563,9 @@ AC10C_OUT="$TEST_DIR/ac10c-out.txt" set +e ( cd "$AC10C_REPO" - HOME="$FAKE_HOME" "$GATE_SCRIPT" --transcript-path "$AC10C_TILDE_PATH" + HOME="$FAKE_HOME" "$GATE_SCRIPT" \ + --project-root "$AC10C_REPO" \ + --transcript-path "$AC10C_TILDE_PATH" ) > "$AC10C_OUT" 2>&1 AC10C_EXIT=$? set -e @@ -1307,7 +1311,7 @@ AC22_OUT="$TEST_DIR/ac22-out.txt" set +e ( cd "$AC22_REPO" - "$GATE_SCRIPT" --transcript-path "$AC22_TRANSCRIPT" + "$GATE_SCRIPT" --project-root "$AC22_REPO" --transcript-path "$AC22_TRANSCRIPT" ) > "$AC22_OUT" 2>&1 AC22_EXIT=$? set -e @@ -1362,7 +1366,7 @@ AC22B_OUT="$TEST_DIR/ac22b-out.txt" set +e ( cd "$AC22B_REPO" - "$GATE_SCRIPT" + "$GATE_SCRIPT" --project-root "$AC22B_REPO" ) > "$AC22B_OUT" 2>&1 AC22B_EXIT=$? set -e From 39f09c47d7089bbea4f3df75977ab42c5312bb66 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 15:55:02 -0700 Subject: [PATCH 67/97] Avoid duplicate jq pass in bg-pending short-circuit hooks/loop-codex-stop-hook.sh previously called has_pending_background_tasks AND count_pending_background_tasks back-to-back on the short-circuit path. Both wrap list_pending_background_task_ids, so every pending-bg stop was running jq over the transcript twice. Collapse into a single list_pending_background_task_ids call: capture the id list once, use its non-emptiness for the short-circuit decision, and derive the count with `wc -l`. The public helpers has_pending_background_tasks and count_pending_background_tasks stay for other callers. No behaviour change. systemMessage wording and exit codes identical. Validation (all six suites green): - bash tests/run-all-tests.sh -> 1718 passed, 0 failed - bash tests/test-stop-hook-bg-allow.sh -> 39 passed, 0 failed - HOME=/nonexistent/readonly ... -> 39 passed, 0 failed - TZ=America/Los_Angeles ... -> 39 passed, 0 failed - TZ=Asia/Tokyo ... -> 39 passed, 0 failed - CLAUDE_PROJECT_DIR=/tmp/outer-unrelated ... -> 39 passed, 0 failed Version stays at 1.16.0. --- hooks/loop-codex-stop-hook.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 405c82ef..6779f94e 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -137,8 +137,9 @@ fi # # This check MUST run before any other gate (phase detection, state parsing, # branch / plan / git-clean / summary / max-iter checks, Codex review). -if has_pending_background_tasks "$HOOK_TRANSCRIPT_PATH" "$LOOP_START_TS"; then - PENDING_BG_COUNT=$(count_pending_background_tasks "$HOOK_TRANSCRIPT_PATH" "$LOOP_START_TS") +PENDING_BG_IDS=$(list_pending_background_task_ids "$HOOK_TRANSCRIPT_PATH" "$LOOP_START_TS" 2>/dev/null) || true +if [[ -n "$PENDING_BG_IDS" ]]; then + PENDING_BG_COUNT=$(printf '%s\n' "$PENDING_BG_IDS" | sed '/^$/d' | wc -l | tr -d ' ') # Mark the loop as parked; allows the same session to resume later and # makes the cross-session guard above reachable if the user opens a # different Claude session in this repo before the bg task completes. From 3711e5fd9059584c7bf98cf1d19ee02dcf5bef48 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 16:47:36 -0700 Subject: [PATCH 68/97] Extract bg-task helpers into dedicated lib and guard transcript_path survival Move the six background-task helpers (expand_leading_tilde, extract_transcript_path, derive_loop_start_iso_ts, list/has/count_pending_background_task[_ids]) and the four hook guard blocks (ambiguous caller, cross-session parked, pending-bg short-circuit, stale-marker cleanup) out of hooks/lib/loop-common.sh and hooks/loop-codex-stop-hook.sh into a new hooks/lib/loop-bg-tasks.sh. The stop hook now delegates to a single handle_bg_task_short_circuit entry point; loop-common.sh sources the new lib so every existing consumer continues to see the helpers transparently. Add a regression test to tests/test-stop-gate.sh that asserts transcript_path is still forwarded to the hook when session_id is empty, freezing the jq object-collapse fix that replaced plain select(length > 0) field values with explicit if/then/else nulls. --- hooks/lib/loop-bg-tasks.sh | 363 ++++++++++++++++++++++++++++++++++ hooks/lib/loop-common.sh | 220 ++------------------- hooks/loop-codex-stop-hook.sh | 115 ++--------- tests/test-stop-gate.sh | 75 +++++++ 4 files changed, 467 insertions(+), 306 deletions(-) create mode 100755 hooks/lib/loop-bg-tasks.sh diff --git a/hooks/lib/loop-bg-tasks.sh b/hooks/lib/loop-bg-tasks.sh new file mode 100755 index 00000000..116e9e36 --- /dev/null +++ b/hooks/lib/loop-bg-tasks.sh @@ -0,0 +1,363 @@ +#!/usr/bin/env bash +# +# Background-task helpers for the RLCR stop hook. +# +# Owns all logic that inspects the Claude Code transcript to decide +# whether the hook should short-circuit (the main session is still +# waiting on an asynchronous Agent/Bash dispatch), plus the four guard +# blocks that the stop hook runs before its normal gate logic: +# +# 1. Ambiguous-caller marker guard +# 2. Cross-session parked-loop guard +# 3. Early exit: pending background tasks +# 4. Same-session stale-marker cleanup +# +# Depends on loop-common.sh (FIELD_SESSION_ID, resolve_active_state_file) +# being sourced first. +# + +# Source guard. +[[ -n "${_LOOP_BG_TASKS_LOADED:-}" ]] && return 0 2>/dev/null || true +_LOOP_BG_TASKS_LOADED=1 + +# Expand a leading "~" or "~/" in a path to "$HOME" without using eval. +# Only the bare "~" and "~/..." forms are expanded; "~user/..." and every +# other input (absolute path, relative path, empty string) is returned verbatim. +# +# Usage: expand_leading_tilde "$path" +# Prints the normalized path to stdout. +expand_leading_tilde() { + local path="$1" + case "$path" in + '~') printf '%s' "${HOME:-}" ;; + '~/'*) printf '%s/%s' "${HOME:-}" "${path#'~/'}" ;; + *) printf '%s' "$path" ;; + esac +} + +# Extract transcript_path from hook JSON input and expand any leading tilde. +# Usage: extract_transcript_path "$json_input" +# Outputs the transcript_path to stdout, or empty string if not available. +extract_transcript_path() { + local input="$1" + local raw + raw=$(printf '%s' "$input" | jq -r '.transcript_path // empty' 2>/dev/null || echo "") + expand_leading_tilde "$raw" +} + +# Convert an RLCR loop dir basename to a lexically-comparable ISO-8601 +# UTC timestamp suitable for filtering transcript events. +# +# `setup-rlcr-loop.sh` creates loop dirs named `YYYY-MM-DD_HH-MM-SS` in +# the system's LOCAL wall clock (it calls `date +%Y-%m-%d_%H-%M-%S` +# without `-u`). Claude transcript events carry actual UTC timestamps +# like `2026-04-16T13:19:26.819Z`. To compare them correctly, this +# helper converts the local wall-clock parse back to a real UTC moment +# via a two-step: parse local -> epoch seconds -> format in UTC. +# +# The `.000Z` suffix keeps sub-second transcript timestamps in the same +# second compared greater via lexical string ordering. +# +# Usage: derive_loop_start_iso_ts "$loop_dir" +# Prints the ISO-8601 UTC timestamp, or empty string when the +# basename does not match the expected format or the local `date` +# binary cannot parse it. +derive_loop_start_iso_ts() { + local loop_dir="$1" + local base + base=$(basename "$loop_dir" 2>/dev/null || echo "") + if [[ ! "$base" =~ ^([0-9]{4}-[0-9]{2}-[0-9]{2})_([0-9]{2})-([0-9]{2})-([0-9]{2})$ ]]; then + return + fi + local local_datetime + local_datetime="${BASH_REMATCH[1]} ${BASH_REMATCH[2]}:${BASH_REMATCH[3]}:${BASH_REMATCH[4]}" + + # Local wall-clock -> epoch seconds. GNU `date -d` first, + # BSD/macOS `date -j -f ...` second. Both honour the caller's TZ + # for interpretation, matching setup-rlcr-loop.sh's behaviour at + # loop-dir creation time. + local epoch + epoch=$(date -d "$local_datetime" +%s 2>/dev/null) || epoch="" + if [[ -z "$epoch" ]]; then + epoch=$(date -j -f "%Y-%m-%d %H:%M:%S" "$local_datetime" +%s 2>/dev/null) || epoch="" + fi + if [[ -z "$epoch" ]]; then + return + fi + + # Epoch -> UTC ISO-8601. Try GNU then BSD. + local utc_iso + utc_iso=$(date -u -d "@$epoch" "+%Y-%m-%dT%H:%M:%S.000Z" 2>/dev/null) || utc_iso="" + if [[ -z "$utc_iso" ]]; then + utc_iso=$(date -u -r "$epoch" "+%Y-%m-%dT%H:%M:%S.000Z" 2>/dev/null) || utc_iso="" + fi + printf '%s' "$utc_iso" +} + +# Enumerate background-task ids that have been launched but not yet marked +# completed in a Claude Code transcript.jsonl. +# +# Launch events (inspected in tool_result "user" messages): +# - Background subagent: toolUseResult.isAsync == true +# -> id is toolUseResult.agentId +# - Background shell: toolUseResult.backgroundTaskId non-empty +# -> id is toolUseResult.backgroundTaskId +# +# Completion events are recognised from two Claude Code transcript forms: +# +# 1. Structured SDK record +# (see SDKTaskNotificationMessage in docs/typescript.md): +# `type == "system"`, `subtype == "task_notification"`, +# `task_id` is the completed id. Any `status` value +# (completed, failed, stopped, ...) is treated as terminal. +# +# 2. Legacy queue-operation enqueue whose `content` embeds a +# `` XML block with `...`; +# kept for transcripts produced by older Claude Code versions. +# +# pending := launched \ completed +# +# Optional second argument `since_ts` (ISO-8601 string, e.g. the value +# returned by `derive_loop_start_iso_ts`): when provided, only launch +# events whose top-level `.timestamp` field is >= `since_ts` count as +# candidate launches. Events without a `.timestamp` are included (keeps +# fixture transcripts and older record formats working). This keeps +# pre-loop session-wide background work from pinning an RLCR loop that +# has no pending work of its own. +# +# Usage: list_pending_background_task_ids "$transcript_path" [since_ts] +# - Outputs one id per line on stdout (possibly empty). +# - Returns 0 when the transcript is readable (including when there are +# no pending tasks). Returns 1 when the transcript path is empty, not +# a regular file, or jq is unavailable, so callers must treat non-zero +# as "unknown -> do not short-circuit". +list_pending_background_task_ids() { + local transcript_path="$1" + local since_ts="${2:-}" + + # Normalize a leading tilde so direct callers (tests, ad-hoc scripts) + # work correctly even when transcript_path was not routed through + # extract_transcript_path. + transcript_path=$(expand_leading_tilde "$transcript_path") + + if [[ -z "$transcript_path" ]] || [[ ! -f "$transcript_path" ]]; then + return 1 + fi + if ! command -v jq >/dev/null 2>&1; then + return 1 + fi + + local launched completed + launched=$(jq -r --arg since_ts "$since_ts" ' + select(.toolUseResult != null) + | select( + ($since_ts == "" + or ((.timestamp // "") == "") + or ((.timestamp // "") >= $since_ts)) + ) + | select( + (.toolUseResult.isAsync == true and (.toolUseResult.agentId // "") != "") + or ((.toolUseResult.backgroundTaskId // "") != "") + ) + | (.toolUseResult.agentId // .toolUseResult.backgroundTaskId) + ' "$transcript_path" 2>/dev/null | sort -u) || return 1 + + # Union of both completion formats. Either source alone is enough to + # mark a launched id terminal. + # + # The `grep -oE || true` guard on the legacy branch keeps `set -o + # pipefail` from poisoning the combined pipeline when no legacy + # queue-operation records exist in the transcript (grep with `-o` + # exits 1 on no matches, which would otherwise wipe out any SDK + # task_notification results collected above). + completed=$( + { + jq -r ' + select(.type == "system" and .subtype == "task_notification") + | (.task_id // empty) + ' "$transcript_path" 2>/dev/null + jq -r ' + select(.type == "queue-operation" and .operation == "enqueue") + | (.content // "" | tostring) + | select(contains("")) + ' "$transcript_path" 2>/dev/null \ + | { grep -oE '[^<]+' || true; } \ + | sed -E 's|||g' + } | sort -u | sed '/^$/d' + ) || completed="" + + # Emit launched ids that have no matching completion notification. + comm -23 \ + <(printf '%s\n' "$launched" | sed '/^$/d') \ + <(printf '%s\n' "$completed" | sed '/^$/d') +} + +# Returns 0 when the transcript shows at least one pending background task. +# Returns 1 when no pending tasks are detected (including fail-closed cases +# like missing transcript, non-file path, or jq unavailable). +# +# Usage: has_pending_background_tasks "$transcript_path" [since_ts] +has_pending_background_tasks() { + local transcript_path="$1" + local since_ts="${2:-}" + local pending + pending=$(list_pending_background_task_ids "$transcript_path" "$since_ts" 2>/dev/null) || return 1 + [[ -n "$pending" ]] +} + +# Prints the count of pending background tasks to stdout. Prints 0 for any +# error case so callers can still format messages safely. +# +# Usage: count_pending_background_tasks "$transcript_path" [since_ts] +count_pending_background_tasks() { + local transcript_path="$1" + local since_ts="${2:-}" + local pending + pending=$(list_pending_background_task_ids "$transcript_path" "$since_ts" 2>/dev/null) || { + echo 0 + return 0 + } + if [[ -z "$pending" ]]; then + echo 0 + else + printf '%s\n' "$pending" | sed '/^$/d' | wc -l | tr -d ' ' + fi +} + +# Single entry point for the stop hook: runs the four guard blocks +# (ambiguous-caller, cross-session parked, pending-bg short-circuit, +# same-session stale-marker cleanup) in order. When a guard decides to +# short-circuit the stop hook, it emits the appropriate JSON on stdout +# and `exit 0`s directly; the caller (sourcing the hook script) never +# returns. When no guard fires, this function returns 0 and the stop +# hook continues into its normal gate logic. +# +# Depends on FIELD_SESSION_ID and resolve_active_state_file from +# loop-common.sh. +# +# Usage: handle_bg_task_short_circuit "$LOOP_DIR" "$HOOK_INPUT" "$HOOK_SESSION_ID" +handle_bg_task_short_circuit() { + local loop_dir="$1" hook_input="$2" hook_session_id="$3" + + # Shared state used by the guard blocks below. + # Loop-start boundary: derived from the loop dir basename + # (`YYYY-MM-DD_HH-MM-SS`). Empty means derivation failed; helpers + # treat empty since_ts as no boundary. + local loop_start_ts transcript_path + loop_start_ts=$(derive_loop_start_iso_ts "$loop_dir") + transcript_path=$(extract_transcript_path "$hook_input") + + # ---------------------------------------- + # Ambiguous-Caller Marker Guard + # ---------------------------------------- + # If a bg-pending.marker is present but we have no session_id on + # this hook invocation (typical of scripts/rlcr-stop-gate.sh + # invoked without --session-id, or any other caller that doesn't + # forward session_id), we cannot tell whether this caller owns the + # parked loop. Taking either branch (foreign-session guard below, + # or same-session cleanup further down) would be wrong in one of + # the two possible realities. Exit 0 silently: the real Claude + # hook will arrive with session_id populated and drive parking / + # cleanup from an authoritative context. + if [[ -f "$loop_dir/bg-pending.marker" ]] && [[ -z "$hook_session_id" ]]; then + exit 0 + fi + + # ---------------------------------------- + # Cross-Session Parked-Loop Guard + # ---------------------------------------- + # If find_active_loop handed this dir over via the marker fallback, + # the loop is parked by a different session waiting on a background + # task. The current session has no authority to inspect or advance + # that loop - its transcript sees none of the foreign bg activity - + # so the only safe response is to exit 0 with a distinct + # systemMessage and leave every on-disk artifact (state file, + # stored session_id, marker) untouched. + # + # Both sides of the session-id comparison must be non-empty for + # this branch to trigger: an empty hook_session_id has already + # exited above via the ambiguous-caller guard, and an empty stored + # session_id keeps the backward-compat "matches any" semantics + # from find_active_loop. + if [[ -f "$loop_dir/bg-pending.marker" ]]; then + local guard_state_file guard_stored_sid + guard_state_file=$(resolve_active_state_file "$loop_dir") + if [[ -n "$guard_state_file" ]]; then + guard_stored_sid=$(sed -n '/^---$/,/^---$/{ /^'"${FIELD_SESSION_ID}"':/{ s/^'"${FIELD_SESSION_ID}"': *//; p; } }' "$guard_state_file" 2>/dev/null | tr -d ' ') + if [[ -n "$guard_stored_sid" ]] \ + && [[ -n "$hook_session_id" ]] \ + && [[ "$guard_stored_sid" != "$hook_session_id" ]]; then + jq -n \ + '{systemMessage: "RLCR loop in this repo is parked by another Claude session waiting for background work. Stop allowed; your session leaves the loop untouched. If that session ended, run /humanize:cancel-rlcr-loop to clean up."}' + exit 0 + fi + fi + fi + + # ---------------------------------------- + # Early Exit: Pending Background Tasks + # ---------------------------------------- + # When the main Claude Code session has dispatched background work + # (Agent with run_in_background=true, or Bash with + # run_in_background=true) whose completion notifications have not + # yet arrived, the natural "stop" is simply "I am waiting for the + # background task". Running git/summary/BitLesson/Codex gates in + # that state wastes Codex tokens and produces low-signal reviews. + # + # Allow the stop (exit 0) and emit a user-visible systemMessage so + # nobody mistakes the pause for loop completion. The on-disk loop + # state is left untouched -- the next natural stop (after + # background work finishes) will re-enter this hook with no + # pending tasks and run the normal flow. + # + # loop_start_ts confines the transcript scan to launches that + # actually happened during this loop; earlier session-wide bg + # activity cannot pin the loop. + # + # This check MUST run before any other gate (phase detection, + # state parsing, branch / plan / git-clean / summary / max-iter + # checks, Codex review). + local pending_bg_ids + pending_bg_ids=$(list_pending_background_task_ids "$transcript_path" "$loop_start_ts" 2>/dev/null) || true + if [[ -n "$pending_bg_ids" ]]; then + local pending_bg_count + pending_bg_count=$(printf '%s\n' "$pending_bg_ids" | sed '/^$/d' | wc -l | tr -d ' ') + # Mark the loop as parked; allows the same session to resume + # later and makes the cross-session guard above reachable if + # the user opens a different Claude session in this repo + # before the bg task completes. + : > "$loop_dir/bg-pending.marker" 2>/dev/null || true + jq -n --arg count "$pending_bg_count" \ + '{systemMessage: ("RLCR loop active. " + $count + " background task(s) still running - stop allowed naturally; loop has NOT terminated and will resume on completion.")}' + exit 0 + fi + + # ---------------------------------------- + # Same-Session Stale-Marker Cleanup + # ---------------------------------------- + # The cross-session guard above already exited for every foreign + # session, so reaching here with the marker present means the + # CURRENT session parked the loop and has now come back with a + # transcript showing no pending bg events. Remove the stale marker + # before the normal flow takes over. + # + # Two-part guard to make sure we never drop the parked-state + # signal without evidence: + # (a) list_pending_background_task_ids returned exit 0 -- the + # transcript was present, readable, AND parsed successfully. + # The helper is fail-closed on missing files, empty paths, + # jq parse failure, and truncation, so a non-zero exit + # blocks cleanup here even when the transcript "file" + # exists. + # (b) its output is empty -- proves "no pending" was + # authoritatively verified, not inferred from a failure. + # The check uses a single fresh call so we capture both the exit + # code and the emptiness without double-running jq. + if [[ -f "$loop_dir/bg-pending.marker" ]]; then + local pending_bg_check + if pending_bg_check=$(list_pending_background_task_ids "$transcript_path" "$loop_start_ts" 2>/dev/null) \ + && [[ -z "$pending_bg_check" ]]; then + rm -f "$loop_dir/bg-pending.marker" 2>/dev/null || true + fi + fi +} diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 7e18df9b..374a5d30 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -242,209 +242,11 @@ extract_session_id() { printf '%s' "$input" | jq -r '.session_id // empty' 2>/dev/null || echo "" } -# Expand a leading "~" or "~/" in a path to "$HOME" without using eval. -# Only the bare "~" and "~/..." forms are expanded; "~user/..." and every -# other input (absolute path, relative path, empty string) is returned verbatim. -# -# Usage: expand_leading_tilde "$path" -# Prints the normalized path to stdout. -expand_leading_tilde() { - local path="$1" - case "$path" in - '~') printf '%s' "${HOME:-}" ;; - '~/'*) printf '%s/%s' "${HOME:-}" "${path#'~/'}" ;; - *) printf '%s' "$path" ;; - esac -} - -# Extract transcript_path from hook JSON input and expand any leading tilde. -# Usage: extract_transcript_path "$json_input" -# Outputs the transcript_path to stdout, or empty string if not available. -extract_transcript_path() { - local input="$1" - local raw - raw=$(printf '%s' "$input" | jq -r '.transcript_path // empty' 2>/dev/null || echo "") - expand_leading_tilde "$raw" -} - -# Convert an RLCR loop dir basename to a lexically-comparable ISO-8601 -# UTC timestamp suitable for filtering transcript events. -# -# `setup-rlcr-loop.sh` creates loop dirs named `YYYY-MM-DD_HH-MM-SS` in -# the system's LOCAL wall clock (it calls `date +%Y-%m-%d_%H-%M-%S` -# without `-u`). Claude transcript events carry actual UTC timestamps -# like `2026-04-16T13:19:26.819Z`. To compare them correctly, this -# helper converts the local wall-clock parse back to a real UTC moment -# via a two-step: parse local -> epoch seconds -> format in UTC. -# -# The `.000Z` suffix keeps sub-second transcript timestamps in the same -# second compared greater via lexical string ordering. -# -# Usage: derive_loop_start_iso_ts "$loop_dir" -# Prints the ISO-8601 UTC timestamp, or empty string when the -# basename does not match the expected format or the local `date` -# binary cannot parse it. -derive_loop_start_iso_ts() { - local loop_dir="$1" - local base - base=$(basename "$loop_dir" 2>/dev/null || echo "") - if [[ ! "$base" =~ ^([0-9]{4}-[0-9]{2}-[0-9]{2})_([0-9]{2})-([0-9]{2})-([0-9]{2})$ ]]; then - return - fi - local local_datetime - local_datetime="${BASH_REMATCH[1]} ${BASH_REMATCH[2]}:${BASH_REMATCH[3]}:${BASH_REMATCH[4]}" - - # Local wall-clock -> epoch seconds. GNU `date -d` first, - # BSD/macOS `date -j -f ...` second. Both honour the caller's TZ - # for interpretation, matching setup-rlcr-loop.sh's behaviour at - # loop-dir creation time. - local epoch - epoch=$(date -d "$local_datetime" +%s 2>/dev/null) || epoch="" - if [[ -z "$epoch" ]]; then - epoch=$(date -j -f "%Y-%m-%d %H:%M:%S" "$local_datetime" +%s 2>/dev/null) || epoch="" - fi - if [[ -z "$epoch" ]]; then - return - fi - - # Epoch -> UTC ISO-8601. Try GNU then BSD. - local utc_iso - utc_iso=$(date -u -d "@$epoch" "+%Y-%m-%dT%H:%M:%S.000Z" 2>/dev/null) || utc_iso="" - if [[ -z "$utc_iso" ]]; then - utc_iso=$(date -u -r "$epoch" "+%Y-%m-%dT%H:%M:%S.000Z" 2>/dev/null) || utc_iso="" - fi - printf '%s' "$utc_iso" -} - -# Enumerate background-task ids that have been launched but not yet marked -# completed in a Claude Code transcript.jsonl. -# -# Launch events (inspected in tool_result "user" messages): -# - Background subagent: toolUseResult.isAsync == true -# -> id is toolUseResult.agentId -# - Background shell: toolUseResult.backgroundTaskId non-empty -# -> id is toolUseResult.backgroundTaskId -# -# Completion events are recognised from two Claude Code transcript forms: -# -# 1. Structured SDK record -# (see SDKTaskNotificationMessage in docs/typescript.md): -# `type == "system"`, `subtype == "task_notification"`, -# `task_id` is the completed id. Any `status` value -# (completed, failed, stopped, ...) is treated as terminal. -# -# 2. Legacy queue-operation enqueue whose `content` embeds a -# `` XML block with `...`; -# kept for transcripts produced by older Claude Code versions. -# -# pending := launched \ completed -# -# Optional second argument `since_ts` (ISO-8601 string, e.g. the value -# returned by `derive_loop_start_iso_ts`): when provided, only launch -# events whose top-level `.timestamp` field is >= `since_ts` count as -# candidate launches. Events without a `.timestamp` are included (keeps -# fixture transcripts and older record formats working). This keeps -# pre-loop session-wide background work from pinning an RLCR loop that -# has no pending work of its own. -# -# Usage: list_pending_background_task_ids "$transcript_path" [since_ts] -# - Outputs one id per line on stdout (possibly empty). -# - Returns 0 when the transcript is readable (including when there are -# no pending tasks). Returns 1 when the transcript path is empty, not -# a regular file, or jq is unavailable, so callers must treat non-zero -# as "unknown -> do not short-circuit". -list_pending_background_task_ids() { - local transcript_path="$1" - local since_ts="${2:-}" - - # Normalize a leading tilde so direct callers (tests, ad-hoc scripts) - # work correctly even when transcript_path was not routed through - # extract_transcript_path. - transcript_path=$(expand_leading_tilde "$transcript_path") - - if [[ -z "$transcript_path" ]] || [[ ! -f "$transcript_path" ]]; then - return 1 - fi - if ! command -v jq >/dev/null 2>&1; then - return 1 - fi - - local launched completed - launched=$(jq -r --arg since_ts "$since_ts" ' - select(.toolUseResult != null) - | select( - ($since_ts == "" - or ((.timestamp // "") == "") - or ((.timestamp // "") >= $since_ts)) - ) - | select( - (.toolUseResult.isAsync == true and (.toolUseResult.agentId // "") != "") - or ((.toolUseResult.backgroundTaskId // "") != "") - ) - | (.toolUseResult.agentId // .toolUseResult.backgroundTaskId) - ' "$transcript_path" 2>/dev/null | sort -u) || return 1 - - # Union of both completion formats. Either source alone is enough to - # mark a launched id terminal. - # - # The `grep -oE || true` guard on the legacy branch keeps `set -o - # pipefail` from poisoning the combined pipeline when no legacy - # queue-operation records exist in the transcript (grep with `-o` - # exits 1 on no matches, which would otherwise wipe out any SDK - # task_notification results collected above). - completed=$( - { - jq -r ' - select(.type == "system" and .subtype == "task_notification") - | (.task_id // empty) - ' "$transcript_path" 2>/dev/null - jq -r ' - select(.type == "queue-operation" and .operation == "enqueue") - | (.content // "" | tostring) - | select(contains("")) - ' "$transcript_path" 2>/dev/null \ - | { grep -oE '[^<]+' || true; } \ - | sed -E 's|||g' - } | sort -u | sed '/^$/d' - ) || completed="" - - # Emit launched ids that have no matching completion notification. - comm -23 \ - <(printf '%s\n' "$launched" | sed '/^$/d') \ - <(printf '%s\n' "$completed" | sed '/^$/d') -} - -# Returns 0 when the transcript shows at least one pending background task. -# Returns 1 when no pending tasks are detected (including fail-closed cases -# like missing transcript, non-file path, or jq unavailable). -# -# Usage: has_pending_background_tasks "$transcript_path" [since_ts] -has_pending_background_tasks() { - local transcript_path="$1" - local since_ts="${2:-}" - local pending - pending=$(list_pending_background_task_ids "$transcript_path" "$since_ts" 2>/dev/null) || return 1 - [[ -n "$pending" ]] -} - -# Prints the count of pending background tasks to stdout. Prints 0 for any -# error case so callers can still format messages safely. -# -# Usage: count_pending_background_tasks "$transcript_path" [since_ts] -count_pending_background_tasks() { - local transcript_path="$1" - local since_ts="${2:-}" - local pending - pending=$(list_pending_background_task_ids "$transcript_path" "$since_ts" 2>/dev/null) || { - echo 0 - return 0 - } - if [[ -z "$pending" ]]; then - echo 0 - else - printf '%s\n' "$pending" | sed '/^$/d' | wc -l | tr -d ' ' - fi -} +# Background-task helpers (expand_leading_tilde, extract_transcript_path, +# derive_loop_start_iso_ts, list/has/count_pending_background_task[_ids], +# handle_bg_task_short_circuit) live in loop-bg-tasks.sh and are sourced +# at the bottom of this file so every existing consumer of loop-common.sh +# continues to get them transparently. # Resolve the active state file for a loop directory # Checks for finalize-state.md first, then state.md @@ -1712,3 +1514,15 @@ end_loop() { return 1 fi } + +# Source background-task helpers. Sourced at the bottom so every function +# above is available to callers that only need loop-common.sh, while bg-aware +# callers (the stop hook, the test suite) still get the bg helpers via a +# single source of loop-common.sh. +# +# _LOOP_COMMON_DIR is set here instead of at the top of the file because +# loop-bg-tasks.sh lives in the same directory as this file and we want to +# locate it regardless of how loop-common.sh was sourced. +_LOOP_COMMON_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +# shellcheck source=loop-bg-tasks.sh +source "$_LOOP_COMMON_DIR/loop-bg-tasks.sh" diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 6779f94e..f3a821d5 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -68,110 +68,19 @@ if [[ -z "$LOOP_DIR" ]]; then exit 0 fi -# Shared state used by both guard blocks and the pending-tasks check below. -# Loop-start boundary: derived from the loop dir basename (`YYYY-MM-DD_HH-MM-SS`). -# Empty means derivation failed; helpers treat empty since_ts as no boundary. -LOOP_START_TS=$(derive_loop_start_iso_ts "$LOOP_DIR") -HOOK_TRANSCRIPT_PATH=$(extract_transcript_path "$HOOK_INPUT") - -# ======================================== -# Ambiguous-Caller Marker Guard -# ======================================== -# If a bg-pending.marker is present but we have no session_id on this -# hook invocation (typical of scripts/rlcr-stop-gate.sh invoked without -# --session-id, or any other caller that doesn't forward session_id), -# we cannot tell whether this caller owns the parked loop. Taking either -# branch (foreign-session guard below, or same-session cleanup further -# down) would be wrong in one of the two possible realities. Exit 0 -# silently: the real Claude hook will arrive with session_id populated -# and drive parking / cleanup from an authoritative context. -if [[ -f "$LOOP_DIR/bg-pending.marker" ]] && [[ -z "$HOOK_SESSION_ID" ]]; then - exit 0 -fi - -# ======================================== -# Cross-Session Parked-Loop Guard # ======================================== -# If find_active_loop handed this dir over via the marker fallback, the -# loop is parked by a different session waiting on a background task. The -# current session has no authority to inspect or advance that loop - its -# transcript sees none of the foreign bg activity - so the only safe -# response is to exit 0 with a distinct systemMessage and leave every -# on-disk artifact (state file, stored session_id, marker) untouched. -# -# Both sides of the session-id comparison must be non-empty for this -# branch to trigger: an empty HOOK_SESSION_ID has already exited above -# via the ambiguous-caller guard, and an empty stored session_id keeps -# the backward-compat "matches any" semantics from find_active_loop. -if [[ -f "$LOOP_DIR/bg-pending.marker" ]]; then - GUARD_STATE_FILE=$(resolve_active_state_file "$LOOP_DIR") - if [[ -n "$GUARD_STATE_FILE" ]]; then - GUARD_STORED_SID=$(sed -n '/^---$/,/^---$/{ /^'"${FIELD_SESSION_ID}"':/{ s/^'"${FIELD_SESSION_ID}"': *//; p; } }' "$GUARD_STATE_FILE" 2>/dev/null | tr -d ' ') - if [[ -n "$GUARD_STORED_SID" ]] \ - && [[ -n "$HOOK_SESSION_ID" ]] \ - && [[ "$GUARD_STORED_SID" != "$HOOK_SESSION_ID" ]]; then - jq -n \ - '{systemMessage: "RLCR loop in this repo is parked by another Claude session waiting for background work. Stop allowed; your session leaves the loop untouched. If that session ended, run /humanize:cancel-rlcr-loop to clean up."}' - exit 0 - fi - fi -fi - -# ======================================== -# Early Exit: Pending Background Tasks -# ======================================== -# When the main Claude Code session has dispatched background work (Agent with -# run_in_background=true, or Bash with run_in_background=true) whose -# completion notifications have not yet arrived, the natural "stop" is simply -# "I am waiting for the background task". Running git/summary/BitLesson/Codex -# gates in that state wastes Codex tokens and produces low-signal reviews. -# -# Allow the stop (exit 0) and emit a user-visible systemMessage so nobody -# mistakes the pause for loop completion. The on-disk loop state is left -# untouched -- the next natural stop (after background work finishes) will -# re-enter this hook with no pending tasks and run the normal flow. -# -# LOOP_START_TS confines the transcript scan to launches that actually -# happened during this loop; earlier session-wide bg activity cannot pin -# the loop. -# -# This check MUST run before any other gate (phase detection, state parsing, -# branch / plan / git-clean / summary / max-iter checks, Codex review). -PENDING_BG_IDS=$(list_pending_background_task_ids "$HOOK_TRANSCRIPT_PATH" "$LOOP_START_TS" 2>/dev/null) || true -if [[ -n "$PENDING_BG_IDS" ]]; then - PENDING_BG_COUNT=$(printf '%s\n' "$PENDING_BG_IDS" | sed '/^$/d' | wc -l | tr -d ' ') - # Mark the loop as parked; allows the same session to resume later and - # makes the cross-session guard above reachable if the user opens a - # different Claude session in this repo before the bg task completes. - : > "$LOOP_DIR/bg-pending.marker" 2>/dev/null || true - jq -n --arg count "$PENDING_BG_COUNT" \ - '{systemMessage: ("RLCR loop active. " + $count + " background task(s) still running - stop allowed naturally; loop has NOT terminated and will resume on completion.")}' - exit 0 -fi - -# Same-session resume after background task finished: the cross-session -# guard above already exited for every foreign session, so reaching here -# with the marker present means the CURRENT session parked the loop and -# has now come back with a transcript showing no pending bg events. -# Remove the stale marker before the normal flow takes over. -# -# Two-part guard to make sure we never drop the parked-state signal -# without evidence: -# (a) list_pending_background_task_ids returned exit 0 -- the -# transcript was present, readable, AND parsed successfully. -# The helper is fail-closed on missing files, empty paths, -# jq parse failure, and truncation, so a non-zero exit blocks -# cleanup here even when the transcript "file" exists. -# (b) its output is empty -- proves "no pending" was authoritatively -# verified, not inferred from a failure. -# The check uses a single fresh call so we capture both the exit code -# and the emptiness without double-running jq. -if [[ -f "$LOOP_DIR/bg-pending.marker" ]]; then - if PENDING_BG_CHECK=$(list_pending_background_task_ids "$HOOK_TRANSCRIPT_PATH" "$LOOP_START_TS" 2>/dev/null) \ - && [[ -z "$PENDING_BG_CHECK" ]]; then - rm -f "$LOOP_DIR/bg-pending.marker" 2>/dev/null || true - fi -fi +# Background-Task Guards +# ======================================== +# Delegates to handle_bg_task_short_circuit (hooks/lib/loop-bg-tasks.sh), +# which runs four cohesive guards in order: +# 1. Ambiguous-caller marker guard (no session_id + marker present) +# 2. Cross-session parked-loop guard (foreign session walking in) +# 3. Pending-bg short-circuit (this session has async work in flight) +# 4. Same-session stale-marker cleanup (bg work just finished) +# When any guard short-circuits, it emits the appropriate JSON on stdout +# and `exit 0`s directly; we never return from that call. When no guard +# fires we continue into the normal gate logic below. +handle_bg_task_short_circuit "$LOOP_DIR" "$HOOK_INPUT" "$HOOK_SESSION_ID" # ======================================== # Detect Loop Phase: Normal or Finalize diff --git a/tests/test-stop-gate.sh b/tests/test-stop-gate.sh index 32f2c3ac..612c68e2 100755 --- a/tests/test-stop-gate.sh +++ b/tests/test-stop-gate.sh @@ -203,5 +203,80 @@ else fail "rlcr-stop-gate reports ALLOW when no active loop" "output containing ALLOW:" "$OUTPUT5" fi +# Test 6: Empty session_id must NOT drop transcript_path from the hook +# input JSON (regression: a `select(length > 0)` used as a plain object +# value would collapse the whole enclosing object to empty whenever any +# selected field was empty, wiping forwarded fields like transcript_path +# even though only session_id was missing). The fix replaces the plain +# select with explicit if/then/else so each field independently becomes +# null on empty input. +T6_DIR="$TEST_DIR/t6" +mkdir -p "$T6_DIR/bin" + +# Mock hook that echoes the raw stdin it received, so we can inspect the +# JSON rlcr-stop-gate.sh builds without depending on the real hook's +# pending-bg logic. +cat > "$T6_DIR/bin/loop-codex-stop-hook.sh" <<'MOCK_HOOK_EOF' +#!/usr/bin/env bash +set -euo pipefail +INPUT="$(cat)" +# Emit a JSON block so the gate wrapper walks the non-"allow on empty" +# branch. We set decision:"block" AND include a recognizable reason the +# test can grep for. +printf '%s\n' "$INPUT" > "${MOCK_HOOK_INPUT_LOG:-/dev/null}" +printf '%s\n' '{"decision":"block","reason":"mock-hook","systemMessage":"mock"}' +MOCK_HOOK_EOF +chmod +x "$T6_DIR/bin/loop-codex-stop-hook.sh" + +# Layout expected by rlcr-stop-gate.sh: HUMANIZE_ROOT/hooks/loop-codex-stop-hook.sh. +# We stage a fake plugin root pointing at the mock hook and copy the gate +# wrapper next to it so the relative resolution resolves to the mock. +mkdir -p "$T6_DIR/plugin/scripts" "$T6_DIR/plugin/hooks" +cp "$T6_DIR/bin/loop-codex-stop-hook.sh" "$T6_DIR/plugin/hooks/loop-codex-stop-hook.sh" +cp "$GATE_SCRIPT" "$T6_DIR/plugin/scripts/rlcr-stop-gate.sh" +chmod +x "$T6_DIR/plugin/scripts/rlcr-stop-gate.sh" + +T6_INPUT_LOG="$T6_DIR/hook-input.json" +T6_TRANSCRIPT="$T6_DIR/fake-transcript.jsonl" +: > "$T6_TRANSCRIPT" + +set +e +( + cd "$T6_DIR" + MOCK_HOOK_INPUT_LOG="$T6_INPUT_LOG" \ + "$T6_DIR/plugin/scripts/rlcr-stop-gate.sh" \ + --transcript-path "$T6_TRANSCRIPT" \ + --json +) > "$T6_DIR/out.txt" 2>&1 +EXIT6=$? +set -e + +if [[ ! -f "$T6_INPUT_LOG" ]]; then + fail "rlcr-stop-gate forwards transcript_path when session_id is empty" \ + "mock hook to capture hook input JSON" \ + "captured input log missing; gate output: $(cat "$T6_DIR/out.txt" 2>/dev/null || true)" +else + T6_TRANSCRIPT_SEEN=$(jq -r '.transcript_path // "__MISSING__"' "$T6_INPUT_LOG" 2>/dev/null || echo "__PARSE_ERROR__") + T6_SESSION_SEEN=$(jq -r '.session_id | if . == null then "__NULL__" else . end' "$T6_INPUT_LOG" 2>/dev/null || echo "__PARSE_ERROR__") + if [[ "$T6_TRANSCRIPT_SEEN" == "$T6_TRANSCRIPT" ]] && [[ "$T6_SESSION_SEEN" == "__NULL__" ]]; then + pass "rlcr-stop-gate forwards transcript_path when session_id is empty (jq object-collapse fix)" + else + fail "rlcr-stop-gate forwards transcript_path when session_id is empty (jq object-collapse fix)" \ + "transcript_path=$T6_TRANSCRIPT, session_id=__NULL__" \ + "transcript_path=$T6_TRANSCRIPT_SEEN, session_id=$T6_SESSION_SEEN; raw: $(cat "$T6_INPUT_LOG" 2>/dev/null || true)" + fi +fi + +# Exit 10 because the mock hook always returns decision:"block"; ensure +# the wrapper reached the decision branch rather than exiting 20 +# (wrapper error) or 0 (bogus ALLOW from lost transcript_path). +if [[ "$EXIT6" -eq 10 ]]; then + pass "rlcr-stop-gate reaches decision branch with empty session_id + real transcript_path" +else + T6_BODY=$(cat "$T6_DIR/out.txt" 2>/dev/null || true) + fail "rlcr-stop-gate reaches decision branch with empty session_id + real transcript_path" \ + "exit 10 (mock hook returns block)" "exit $EXIT6; output: $T6_BODY" +fi + print_test_summary "RLCR Stop Gate Wrapper Test Summary" exit $? From fdd8670575a55540f4544b926abd7db07794074f Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 17:49:38 -0700 Subject: [PATCH 69/97] Add liveness probe to prune orphaned background task IDs Tasks killed externally (OOM, manual cancel, parent exit) never emit a completion event in the transcript, leaving their launch IDs in the pending set forever and blocking the stop hook indefinitely. Adds a lsof-based liveness probe in list_pending_background_task_ids: after the transcript diff, each remaining pending ID is checked against its output file under /tmp/claude-///tasks/. If the file exists but lsof reports zero holders the task is confirmed dead and dropped. Fail-open when the file is absent or lsof is unavailable. New helpers: derive_tasks_dir_from_transcript, is_bg_task_alive, prune_dead_bg_task_ids. LSOF_BIN env override allows test stubs. Two new test cases cover the positive path (lsof alive -> short-circuit still fires) and the negative path (lsof dead -> orphan pruned, Codex review runs). --- hooks/lib/loop-bg-tasks.sh | 78 ++++++++++++++++++++++++++++++-- tests/test-stop-hook-bg-allow.sh | 75 +++++++++++++++++++++++++++++- 2 files changed, 149 insertions(+), 4 deletions(-) diff --git a/hooks/lib/loop-bg-tasks.sh b/hooks/lib/loop-bg-tasks.sh index 116e9e36..08eba146 100755 --- a/hooks/lib/loop-bg-tasks.sh +++ b/hooks/lib/loop-bg-tasks.sh @@ -94,6 +94,65 @@ derive_loop_start_iso_ts() { printf '%s' "$utc_iso" } +# Derive the Claude Code task-output directory from a transcript path. +# +# Claude Code writes background-task output files under: +# /tmp/claude-///tasks/.output +# +# The project slug and session id are encoded in the transcript path: +# /projects//.jsonl +# +# Usage: derive_tasks_dir_from_transcript "$transcript_path" +# Prints the tasks dir path, or nothing when derivation fails. +derive_tasks_dir_from_transcript() { + local transcript_path="$1" + [[ -z "$transcript_path" ]] && return + local slug sid uid + slug=$(basename "$(dirname "$transcript_path")" 2>/dev/null) + sid=$(basename "$transcript_path" .jsonl 2>/dev/null) + uid=$(id -u 2>/dev/null) || return + if [[ -z "$slug" ]] || [[ "$slug" == "." ]] || [[ -z "$sid" ]] || [[ -z "$uid" ]]; then + return + fi + printf '/tmp/claude-%s/%s/%s/tasks' "$uid" "$slug" "$sid" +} + +# Returns 0 if the background task identified by task_id appears to be alive +# (output file absent, or lsof reports >= 1 holder), 1 if confirmed dead +# (output file exists and lsof reports 0 holders). +# +# Fail-open: returns 0 (alive) when the output file does not exist, when +# the lsof binary is unavailable, or when lsof exits non-zero for any +# reason other than "no holders". +# +# Set LSOF_BIN to override the lsof binary path (used in tests). +# +# Usage: is_bg_task_alive "$task_id" "$tasks_dir" +is_bg_task_alive() { + local task_id="$1" tasks_dir="$2" + local lsof_bin="${LSOF_BIN:-lsof}" + local output_file="$tasks_dir/$task_id.output" + # Output file absent -> fail open (treat as still running). + [[ -f "$output_file" ]] || return 0 + # lsof unavailable -> fail open. + command -v "$lsof_bin" >/dev/null 2>&1 || return 0 + # lsof exits 0 when >= 1 process has the file open, 1 otherwise. + "$lsof_bin" "$output_file" >/dev/null 2>&1 +} + +# Filter a newline-delimited list of task IDs, retaining only those that +# pass is_bg_task_alive. Prints surviving IDs one per line. +# +# Usage: prune_dead_bg_task_ids "$pending_ids" "$tasks_dir" +prune_dead_bg_task_ids() { + local pending_ids="$1" tasks_dir="$2" + local task_id + while IFS= read -r task_id; do + [[ -z "$task_id" ]] && continue + is_bg_task_alive "$task_id" "$tasks_dir" && printf '%s\n' "$task_id" + done <<< "$pending_ids" +} + # Enumerate background-task ids that have been launched but not yet marked # completed in a Claude Code transcript.jsonl. # @@ -186,10 +245,23 @@ list_pending_background_task_ids() { } | sort -u | sed '/^$/d' ) || completed="" - # Emit launched ids that have no matching completion notification. - comm -23 \ + # Collect launched ids that have no matching completion notification. + local pending + pending=$(comm -23 \ <(printf '%s\n' "$launched" | sed '/^$/d') \ - <(printf '%s\n' "$completed" | sed '/^$/d') + <(printf '%s\n' "$completed" | sed '/^$/d')) + + # Apply liveness probe: drop orphaned task IDs whose output file exists + # but has zero open file descriptors (killed without a completion event). + if [[ -n "$pending" ]]; then + local tasks_dir + tasks_dir=$(derive_tasks_dir_from_transcript "$transcript_path") + if [[ -n "$tasks_dir" ]]; then + pending=$(prune_dead_bg_task_ids "$pending" "$tasks_dir") + fi + fi + + printf '%s\n' "$pending" | sed '/^$/d' } # Returns 0 when the transcript shows at least one pending background task. diff --git a/tests/test-stop-hook-bg-allow.sh b/tests/test-stop-hook-bg-allow.sh index ab9fe571..2f034320 100755 --- a/tests/test-stop-hook-bg-allow.sh +++ b/tests/test-stop-hook-bg-allow.sh @@ -56,6 +56,26 @@ mkdir -p "$XDG_CACHE_HOME" FAKE_HOME="$TEST_DIR/fake-home" mkdir -p "$FAKE_HOME" +# ---------------------------------------------------------------------- +# Mock lsof binaries used by the liveness-probe tests (AC-23, AC-24). +# lsof-alive exits 0 (simulates >= 1 holder: task is running). +# lsof-dead exits 1 (simulates 0 holders: task is orphaned/dead). +# ---------------------------------------------------------------------- +setup_mock_lsof() { + mkdir -p "$TEST_DIR/bin" + cat > "$TEST_DIR/bin/lsof-alive" << 'EOF' +#!/usr/bin/env bash +exit 0 +EOF + chmod +x "$TEST_DIR/bin/lsof-alive" + + cat > "$TEST_DIR/bin/lsof-dead" << 'EOF' +#!/usr/bin/env bash +exit 1 +EOF + chmod +x "$TEST_DIR/bin/lsof-dead" +} + # ---------------------------------------------------------------------- # Mock codex CLI: records an invocation marker and prints canned feedback. # ---------------------------------------------------------------------- @@ -249,7 +269,7 @@ write_transcript() { # Sets RUN_EXIT_CODE, RUN_OUTPUT, RUN_MARKER. # ---------------------------------------------------------------------- run_stop_hook_with_input() { - local repo_dir="$1" hook_input_json="$2" home_override="${3:-}" + local repo_dir="$1" hook_input_json="$2" home_override="${3:-}" lsof_bin_override="${4:-}" RUN_MARKER="$repo_dir/codex-called.marker" rm -f "$RUN_MARKER" @@ -258,6 +278,7 @@ run_stop_hook_with_input() { RUN_OUTPUT=$( cd "$repo_dir" [[ -n "$home_override" ]] && export HOME="$home_override" + [[ -n "$lsof_bin_override" ]] && export LSOF_BIN="$lsof_bin_override" CLAUDE_PROJECT_DIR="$repo_dir" \ MOCK_CODEX_MARKER="$RUN_MARKER" \ MOCK_CODEX_OUTPUT="Mock review feedback" \ @@ -318,6 +339,7 @@ assert_reached_codex() { } setup_mock_codex +setup_mock_lsof # Transcripts live outside any test repo to avoid tripping git cleanliness # gates in the stop hook. @@ -1385,5 +1407,56 @@ else "exit $AC22B_EXIT; marker=$(test -f "$AC22B_MARKER" && echo present || echo missing); state_unchanged=$([[ "$AC22B_STATE_HASH_BEFORE" == "$AC22B_STATE_HASH_AFTER" ]] && echo yes || echo no); output: $AC22B_BODY" fi +# ---------------- AC-23 ---------------- +# Liveness probe positive: a pending task whose output file is open by at +# least one process (lsof exits 0) must still be treated as running. +# The short-circuit must fire and emit a systemMessage. +echo "Test AC-23: liveness probe - alive task (lsof has holder) -> still short-circuits" +AC23_REPO="$TEST_DIR/ac23" +AC23_LOOP=$(create_full_fixture "$AC23_REPO") +AC23_STATE="$AC23_LOOP/state.md" +AC23_TRANSCRIPT="$TRANSCRIPTS_DIR/ac23.jsonl" +AC23_TASK_ID="agent_probe_alive" +AC23_LAUNCH=$(emit_tool_use_assistant "toolu_AC23" "Agent" ',"description":"x","prompt":"x"') +AC23_RESULT=$(emit_async_agent_launch_result "toolu_AC23" "$AC23_TASK_ID") +write_transcript "$AC23_TRANSCRIPT" "$AC23_LAUNCH" "$AC23_RESULT" + +AC23_UID=$(id -u) +AC23_SLUG=$(basename "$TRANSCRIPTS_DIR") +AC23_TASKS_DIR="/tmp/claude-${AC23_UID}/${AC23_SLUG}/ac23/tasks" +mkdir -p "$AC23_TASKS_DIR" +touch "$AC23_TASKS_DIR/${AC23_TASK_ID}.output" + +AC23_INPUT=$(jq -c -n --arg tp "$AC23_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC23_REPO" "$AC23_INPUT" "" "$TEST_DIR/bin/lsof-alive" +rm -rf "/tmp/claude-${AC23_UID}/${AC23_SLUG}/ac23" 2>/dev/null || true +assert_systemmessage_only \ + "AC-23: alive task (lsof has holder) still triggers short-circuit" \ + "$AC23_REPO" "$AC23_STATE" "1 background task" + +# ---------------- AC-24 ---------------- +# Liveness probe negative: a pending task whose output file has no open +# file descriptors (lsof exits 1) was killed without a completion event. +# The probe must drop it so the hook proceeds to normal Codex review. +echo "Test AC-24: liveness probe - dead/orphaned task (lsof no holder) -> reaches Codex" +AC24_REPO="$TEST_DIR/ac24" +create_full_fixture "$AC24_REPO" > /dev/null +AC24_TRANSCRIPT="$TRANSCRIPTS_DIR/ac24.jsonl" +AC24_TASK_ID="agent_probe_dead" +AC24_LAUNCH=$(emit_tool_use_assistant "toolu_AC24" "Agent" ',"description":"x","prompt":"x"') +AC24_RESULT=$(emit_async_agent_launch_result "toolu_AC24" "$AC24_TASK_ID") +write_transcript "$AC24_TRANSCRIPT" "$AC24_LAUNCH" "$AC24_RESULT" + +AC24_UID=$(id -u) +AC24_SLUG=$(basename "$TRANSCRIPTS_DIR") +AC24_TASKS_DIR="/tmp/claude-${AC24_UID}/${AC24_SLUG}/ac24/tasks" +mkdir -p "$AC24_TASKS_DIR" +touch "$AC24_TASKS_DIR/${AC24_TASK_ID}.output" + +AC24_INPUT=$(jq -c -n --arg tp "$AC24_TRANSCRIPT" '{transcript_path:$tp}') +run_stop_hook_with_input "$AC24_REPO" "$AC24_INPUT" "" "$TEST_DIR/bin/lsof-dead" +rm -rf "/tmp/claude-${AC24_UID}/${AC24_SLUG}/ac24" 2>/dev/null || true +assert_reached_codex "AC-24: dead/orphaned task (lsof no holder) is pruned; Codex review runs" + print_test_summary "Stop Hook Background-Task Allow Test Summary" exit $? From 850a4443cc0e56837e94f6a0c9810edf92a80c31 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 20:31:47 -0700 Subject: [PATCH 70/97] WIP: wrap all path with realpath --- hooks/lib/loop-common.sh | 6 +- hooks/lib/project-root.sh | 96 +++++++++++++++++++++++++++++++ hooks/loop-bash-validator.sh | 2 +- hooks/loop-codex-stop-hook.sh | 6 +- hooks/loop-edit-validator.sh | 8 ++- hooks/loop-plan-file-validator.sh | 3 +- hooks/loop-post-bash-hook.sh | 9 ++- hooks/loop-read-validator.sh | 11 +++- hooks/loop-write-validator.sh | 17 ++++-- scripts/ask-codex.sh | 10 ++-- scripts/ask-gemini.sh | 13 +++-- scripts/bitlesson-select.sh | 7 ++- scripts/cancel-rlcr-loop.sh | 12 ++-- scripts/rlcr-stop-gate.sh | 15 ++++- scripts/setup-rlcr-loop.sh | 6 +- tests/test-stop-gate.sh | 10 +++- 16 files changed, 195 insertions(+), 36 deletions(-) create mode 100644 hooks/lib/project-root.sh diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 374a5d30..263501cb 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -173,6 +173,10 @@ LOOP_COMMON_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" LOOP_COMMON_PLUGIN_ROOT="$(cd "$LOOP_COMMON_DIR/../.." && pwd)" export PLUGIN_ROOT="${PLUGIN_ROOT:-$LOOP_COMMON_PLUGIN_ROOT}" +# Shared project-root resolver (CLAUDE_PROJECT_DIR -> git toplevel, +# realpath-canonicalized). Must load before any caller needs PROJECT_ROOT. +source "$LOOP_COMMON_DIR/project-root.sh" + _lc_errexit=false; [[ -o errexit ]] && _lc_errexit=true _lc_nounset=false; [[ -o nounset ]] && _lc_nounset=true _lc_pipefail=false; [[ -o pipefail ]] && _lc_pipefail=true @@ -182,7 +186,7 @@ $_lc_nounset && set -u || set +u $_lc_pipefail && set -o pipefail || set +o pipefail unset _lc_errexit _lc_nounset _lc_pipefail -_LOOP_COMMON_PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(git rev-parse --show-toplevel 2>/dev/null || pwd)}" +_LOOP_COMMON_PROJECT_ROOT="$(resolve_project_root 2>/dev/null || true)" # Config loading is best-effort: use || true so a config-load failure does not # abort sourcing before callers' dependency checks (jq, codex) are reached. # Stderr is NOT suppressed so malformed config warnings remain visible. diff --git a/hooks/lib/project-root.sh b/hooks/lib/project-root.sh new file mode 100644 index 00000000..3887788b --- /dev/null +++ b/hooks/lib/project-root.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash +# +# Deterministic project-root resolver for all humanize hooks and scripts. +# +# Resolution priority: +# 1. CLAUDE_PROJECT_DIR (set by Claude Code, stable across `cd` within a session) +# 2. git rev-parse --show-toplevel (nearest enclosing repo) +# 3. Non-zero return. +# +# pwd is intentionally NOT used as a fallback: it drifts with `cd` +# invocations during a session and silently causes state.md lookups +# under .humanize/rlcr/ to miss the active loop directory. +# +# The resolved path is passed through realpath so symlinked prefixes +# (e.g. /Users/x vs /private/Users/x on macOS, or /var vs /private/var) +# do not diverge between setup-time and hook-time resolution. +# +# Path-comparison sites in validators must mirror this by canonicalizing +# the user-provided side as well; use the companion `canonicalize_path` +# helper below. +# + +if [[ -n "${_HUMANIZE_PROJECT_ROOT_SOURCED:-}" ]]; then + return 0 2>/dev/null || true +fi +_HUMANIZE_PROJECT_ROOT_SOURCED=1 + +# resolve_project_root +# +# Prints the resolved project root to stdout. Returns 0 on success, +# 1 when neither CLAUDE_PROJECT_DIR nor a git toplevel is available. +# +# Callers that must have a project root should handle the failure: +# +# PROJECT_ROOT="$(resolve_project_root)" || exit 0 # hook: allow natural stop +# PROJECT_ROOT="$(resolve_project_root)" || { # setup: hard error +# echo "Error: cannot determine humanize project root" >&2 +# exit 1 +# } +# +resolve_project_root() { + local root="${CLAUDE_PROJECT_DIR:-}" + if [[ -z "$root" ]]; then + root="$(git rev-parse --show-toplevel 2>/dev/null || true)" + fi + if [[ -z "$root" ]]; then + return 1 + fi + + local canonical + canonical=$(canonicalize_path "$root") + printf '%s\n' "${canonical:-$root}" +} + +# canonicalize_path +# +# Prints the realpath of the input path. If the path itself does not +# exist yet (common for write validation before the file is created), +# canonicalizes the parent directory and reattaches the basename. +# If realpath is unavailable and python3 is missing, prints the input +# path verbatim. +# +# Empty input prints nothing and returns 0. +# +canonicalize_path() { + local path="$1" + if [[ -z "$path" ]]; then + return 0 + fi + + local canonical="" + + if canonical=$(realpath "$path" 2>/dev/null) && [[ -n "$canonical" ]]; then + printf '%s\n' "$canonical" + return 0 + fi + + # Path does not exist: canonicalize parent, reattach basename. + local parent base + parent=$(dirname -- "$path") + base=$(basename -- "$path") + if canonical=$(realpath "$parent" 2>/dev/null) && [[ -n "$canonical" ]]; then + printf '%s/%s\n' "${canonical%/}" "$base" + return 0 + fi + + if command -v python3 >/dev/null 2>&1; then + canonical=$(python3 -c 'import os,sys;print(os.path.realpath(sys.argv[1]))' "$path" 2>/dev/null || true) + if [[ -n "$canonical" ]]; then + printf '%s\n' "$canonical" + return 0 + fi + fi + + printf '%s\n' "$path" +} diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 547210fc..3616f3d7 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -49,7 +49,7 @@ COMMAND_LOWER=$(to_lower "$COMMAND") # Find Active Loops (needed for multiple checks) # ======================================== -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" +PROJECT_ROOT="$(resolve_project_root)" || exit 0 # Extract session_id from hook input for session-aware loop filtering HOOK_SESSION_ID=$(extract_session_id "$HOOK_INPUT") diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index f3a821d5..f96ffd53 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -39,13 +39,13 @@ HOOK_INPUT=$(cat) # Find Active Loop # ======================================== -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" -LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" - # Source shared loop functions and template loader SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" source "$SCRIPT_DIR/lib/loop-common.sh" +PROJECT_ROOT="$(resolve_project_root)" || exit 0 +LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" + # Source portable timeout wrapper for git operations PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "$PLUGIN_ROOT/scripts/portable-timeout.sh" diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 17fae65c..32b3bbe9 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -38,7 +38,7 @@ HOOK_SESSION_ID=$(extract_session_id "$HOOK_INPUT") # ======================================== if is_round_file_type "$FILE_PATH_LOWER" "todos"; then - PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" + PROJECT_ROOT="$(resolve_project_root)" || exit 0 LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID") if [[ -z "$LOOP_DIR" ]] || ! is_allowlisted_file "$FILE_PATH" "$LOOP_DIR"; then @@ -59,7 +59,8 @@ fi # This prevents source code modifications after Codex has signed off. # This check MUST come before the humanize loop dir early exit below. -PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" +PROJECT_ROOT="${PROJECT_ROOT:-$(resolve_project_root 2>/dev/null || true)}" +[[ -z "$PROJECT_ROOT" ]] && exit 0 LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" # Use only the session-matched loop. Do NOT fall back to an unfiltered search, # as that would incorrectly restrict unrelated sessions opened in the same repo. @@ -124,7 +125,8 @@ fi # Find Active Loop and Current Round # ======================================== -PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" +PROJECT_ROOT="${PROJECT_ROOT:-$(resolve_project_root 2>/dev/null || true)}" +[[ -z "$PROJECT_ROOT" ]] && exit 0 LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" diff --git a/hooks/loop-plan-file-validator.sh b/hooks/loop-plan-file-validator.sh index 4f336abc..a5ce8c9f 100755 --- a/hooks/loop-plan-file-validator.sh +++ b/hooks/loop-plan-file-validator.sh @@ -11,11 +11,12 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" # Source shared loop functions and template loader source "$SCRIPT_DIR/lib/loop-common.sh" +PROJECT_ROOT="$(resolve_project_root)" || exit 0 + # Source portable timeout wrapper for git operations PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "$PLUGIN_ROOT/scripts/portable-timeout.sh" diff --git a/hooks/loop-post-bash-hook.sh b/hooks/loop-post-bash-hook.sh index a159ebb7..020fa877 100755 --- a/hooks/loop-post-bash-hook.sh +++ b/hooks/loop-post-bash-hook.sh @@ -26,8 +26,13 @@ set -euo pipefail # Read hook JSON input from stdin HOOK_INPUT=$(cat) -# Determine project root -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" +# Determine project root using the shared deterministic resolver. +# If neither CLAUDE_PROJECT_DIR nor a git toplevel is available, there +# is no active loop to patch - exit cleanly (pwd is NOT used as a +# fallback because it drifts with `cd` during a session). +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +source "$SCRIPT_DIR/lib/project-root.sh" +PROJECT_ROOT="$(resolve_project_root)" || exit 0 # Check for pending session_id signal file SIGNAL_FILE="$PROJECT_ROOT/.humanize/.pending-session-id" diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 6b2f64f5..5a27fc4f 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -54,7 +54,7 @@ HOOK_SESSION_ID=$(extract_session_id "$HOOK_INPUT") # ======================================== if is_round_file_type "$FILE_PATH_LOWER" "todos"; then - PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" + PROJECT_ROOT="$(resolve_project_root)" || exit 0 LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID") if [[ -z "$LOOP_DIR" ]] || ! is_allowlisted_file "$FILE_PATH" "$LOOP_DIR"; then @@ -73,7 +73,8 @@ fi # This check MUST come before the summary/prompt early exit below, # otherwise non-summary/prompt files in the loop dir escape restriction. -PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" +PROJECT_ROOT="${PROJECT_ROOT:-$(resolve_project_root 2>/dev/null || true)}" +[[ -z "$PROJECT_ROOT" ]] && exit 0 LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" # Use only the session-matched loop. Do NOT fall back to an unfiltered search, # as that would incorrectly restrict unrelated sessions opened in the same repo. @@ -303,7 +304,11 @@ fi CORRECT_PATH="$ACTIVE_LOOP_DIR/$CLAUDE_FILENAME" -if [[ "$FILE_PATH" != "$CORRECT_PATH" ]]; then +# Compare canonical (symlink-resolved) forms -- see loop-write-validator.sh +# for the rationale; the same reasoning applies to read paths. +_READ_FILE_REAL=$(canonicalize_path "$FILE_PATH") +_READ_CORRECT_REAL=$(canonicalize_path "$CORRECT_PATH") +if [[ "${_READ_FILE_REAL:-$FILE_PATH}" != "${_READ_CORRECT_REAL:-$CORRECT_PATH}" ]]; then FALLBACK="# Wrong Directory Path You tried to {{ACTION}} {{FILE_PATH}} but the correct path is {{CORRECT_PATH}}" diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index a8747e9d..9f46c102 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -55,7 +55,7 @@ HOOK_SESSION_ID=$(extract_session_id "$HOOK_INPUT") # ======================================== if is_round_file_type "$FILE_PATH_LOWER" "todos"; then - PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" + PROJECT_ROOT="$(resolve_project_root)" || exit 0 LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID") if [[ -z "$LOOP_DIR" ]] || ! is_allowlisted_file "$FILE_PATH" "$LOOP_DIR"; then @@ -76,7 +76,8 @@ fi # This prevents source code modifications after Codex has signed off. # This check MUST come before the file type early exits below. -PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" +PROJECT_ROOT="${PROJECT_ROOT:-$(resolve_project_root 2>/dev/null || true)}" +[[ -z "$PROJECT_ROOT" ]] && exit 0 LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" # Use only the session-matched loop. Do NOT fall back to an unfiltered search, # as that would incorrectly restrict unrelated sessions opened in the same repo. @@ -160,7 +161,8 @@ fi # ======================================== # Re-initialize if not set by earlier todos check -PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" +PROJECT_ROOT="${PROJECT_ROOT:-$(resolve_project_root 2>/dev/null || true)}" +[[ -z "$PROJECT_ROOT" ]] && exit 0 LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" @@ -329,7 +331,14 @@ fi CORRECT_PATH="$ACTIVE_LOOP_DIR/$CLAUDE_FILENAME" -if [[ "$FILE_PATH" != "$CORRECT_PATH" ]]; then +# Compare canonical (symlink-resolved) forms so the check is not fooled by +# equivalent paths expressed in different prefix forms (e.g. /var/... vs +# /private/var/... on macOS). A raw string compare would mis-handle a +# symlinked project prefix whenever one side was canonicalized upstream +# (e.g. by resolve_project_root) and the other was not. +_WRITE_FILE_REAL=$(canonicalize_path "$FILE_PATH") +_WRITE_CORRECT_REAL=$(canonicalize_path "$CORRECT_PATH") +if [[ "${_WRITE_FILE_REAL:-$FILE_PATH}" != "${_WRITE_CORRECT_REAL:-$CORRECT_PATH}" ]]; then FALLBACK="# Wrong Directory Path You tried to {{ACTION}} {{FILE_PATH}} but the correct path is {{CORRECT_PATH}}" diff --git a/scripts/ask-codex.sh b/scripts/ask-codex.sh index bea40b4e..47ffeab0 100755 --- a/scripts/ask-codex.sh +++ b/scripts/ask-codex.sh @@ -189,11 +189,11 @@ fi # Detect Project Root # ======================================== -if git rev-parse --show-toplevel &>/dev/null; then - PROJECT_ROOT=$(git rev-parse --show-toplevel) -else - PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" -fi +PROJECT_ROOT="$(resolve_project_root)" || { + echo "Error: Cannot determine project root." >&2 + echo " Set CLAUDE_PROJECT_DIR or run inside a git repository." >&2 + exit 1 +} # ======================================== # Create Storage Directories diff --git a/scripts/ask-gemini.sh b/scripts/ask-gemini.sh index 9f59b804..890260a7 100755 --- a/scripts/ask-gemini.sh +++ b/scripts/ask-gemini.sh @@ -29,6 +29,9 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" # Source portable timeout wrapper source "$SCRIPT_DIR/portable-timeout.sh" +# Shared project-root resolver (CLAUDE_PROJECT_DIR -> git toplevel, realpath-canonical) +source "$SCRIPT_DIR/../hooks/lib/project-root.sh" + # ======================================== # Default Configuration # ======================================== @@ -168,11 +171,11 @@ fi # Detect Project Root # ======================================== -if git rev-parse --show-toplevel &>/dev/null; then - PROJECT_ROOT=$(git rev-parse --show-toplevel) -else - PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" -fi +PROJECT_ROOT="$(resolve_project_root)" || { + echo "Error: Cannot determine project root." >&2 + echo " Set CLAUDE_PROJECT_DIR or run inside a git repository." >&2 + exit 1 +} # ======================================== # Create Storage Directories diff --git a/scripts/bitlesson-select.sh b/scripts/bitlesson-select.sh index d30ef319..fd19a445 100755 --- a/scripts/bitlesson-select.sh +++ b/scripts/bitlesson-select.sh @@ -9,9 +9,14 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" source "$SCRIPT_DIR/lib/config-loader.sh" source "$SCRIPT_DIR/lib/model-router.sh" +source "$SCRIPT_DIR/../hooks/lib/project-root.sh" PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(git rev-parse --show-toplevel 2>/dev/null || pwd)}" +PROJECT_ROOT="$(resolve_project_root)" || { + echo "Error: Cannot determine project root." >&2 + echo " Set CLAUDE_PROJECT_DIR or run inside a git repository." >&2 + exit 1 +} MERGED_CONFIG="$(load_merged_config "$PLUGIN_ROOT" "$PROJECT_ROOT")" BITLESSON_MODEL="$(get_config_value "$MERGED_CONFIG" "bitlesson_model")" BITLESSON_MODEL="${BITLESSON_MODEL:-haiku}" diff --git a/scripts/cancel-rlcr-loop.sh b/scripts/cancel-rlcr-loop.sh index 8ec303f0..a835b875 100755 --- a/scripts/cancel-rlcr-loop.sh +++ b/scripts/cancel-rlcr-loop.sh @@ -66,13 +66,17 @@ done # Find Loop Directory # ======================================== -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" -LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" - -# Source shared loop library for find_active_loop +# Source shared loop library for find_active_loop and resolve_project_root SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" source "$SCRIPT_DIR/../hooks/lib/loop-common.sh" +PROJECT_ROOT="$(resolve_project_root)" || { + echo "Error: Cannot determine humanize project root." >&2 + echo " Set CLAUDE_PROJECT_DIR or run inside a git repository." >&2 + exit 3 +} +LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" + # PRODUCT DECISION: Cancel operates globally (no session_id filtering). # # Cancel is invoked as a standalone Bash command via /cancel-rlcr-loop slash command. diff --git a/scripts/rlcr-stop-gate.sh b/scripts/rlcr-stop-gate.sh index 1e928892..a4cd6329 100755 --- a/scripts/rlcr-stop-gate.sh +++ b/scripts/rlcr-stop-gate.sh @@ -18,7 +18,12 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" HUMANIZE_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" + +# Deterministic project-root resolver (CLAUDE_PROJECT_DIR -> git toplevel, no pwd fallback). +# Overridable via --project-root for non-hook callers; the flag handler below +# always wins because it runs after this default assignment. +source "$HUMANIZE_ROOT/hooks/lib/project-root.sh" +PROJECT_ROOT="$(resolve_project_root 2>/dev/null || true)" HOOK_SCRIPT="$HUMANIZE_ROOT/hooks/loop-codex-stop-hook.sh" SESSION_ID="${CLAUDE_SESSION_ID:-}" @@ -73,6 +78,14 @@ while [[ $# -gt 0 ]]; do esac done +if [[ -z "$PROJECT_ROOT" ]]; then + # No humanize project context reachable from here -- nothing to enforce. + # Allow the stop to proceed instead of returning a wrapper error so that + # invoking the gate outside any project (or any git repo) is benign. + echo "ALLOW: no humanize project root resolved." + exit 0 +fi + if [[ ! -x "$HOOK_SCRIPT" ]]; then echo "Error: Hook script not found or not executable: $HOOK_SCRIPT" >&2 exit 20 diff --git a/scripts/setup-rlcr-loop.sh b/scripts/setup-rlcr-loop.sh index 9d45363c..15326bc4 100755 --- a/scripts/setup-rlcr-loop.sh +++ b/scripts/setup-rlcr-loop.sh @@ -323,7 +323,11 @@ done # Validate Prerequisites # ======================================== -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" +PROJECT_ROOT="$(resolve_project_root)" || { + echo "Error: Cannot determine humanize project root." >&2 + echo " Set CLAUDE_PROJECT_DIR or run inside a git repository." >&2 + exit 1 +} # loop-common.sh already sourced above (provides find_active_loop, etc.) diff --git a/tests/test-stop-gate.sh b/tests/test-stop-gate.sh index 612c68e2..08b037b3 100755 --- a/tests/test-stop-gate.sh +++ b/tests/test-stop-gate.sh @@ -231,9 +231,12 @@ chmod +x "$T6_DIR/bin/loop-codex-stop-hook.sh" # Layout expected by rlcr-stop-gate.sh: HUMANIZE_ROOT/hooks/loop-codex-stop-hook.sh. # We stage a fake plugin root pointing at the mock hook and copy the gate # wrapper next to it so the relative resolution resolves to the mock. -mkdir -p "$T6_DIR/plugin/scripts" "$T6_DIR/plugin/hooks" +mkdir -p "$T6_DIR/plugin/scripts" "$T6_DIR/plugin/hooks/lib" cp "$T6_DIR/bin/loop-codex-stop-hook.sh" "$T6_DIR/plugin/hooks/loop-codex-stop-hook.sh" cp "$GATE_SCRIPT" "$T6_DIR/plugin/scripts/rlcr-stop-gate.sh" +# rlcr-stop-gate sources hooks/lib/project-root.sh for PROJECT_ROOT resolution. +REAL_PROJECT_ROOT_LIB="$(dirname "$GATE_SCRIPT")/../hooks/lib/project-root.sh" +cp "$REAL_PROJECT_ROOT_LIB" "$T6_DIR/plugin/hooks/lib/project-root.sh" chmod +x "$T6_DIR/plugin/scripts/rlcr-stop-gate.sh" T6_INPUT_LOG="$T6_DIR/hook-input.json" @@ -243,6 +246,11 @@ T6_TRANSCRIPT="$T6_DIR/fake-transcript.jsonl" set +e ( cd "$T6_DIR" + # Pin CLAUDE_PROJECT_DIR so rlcr-stop-gate resolves a root even though + # the fixture is not a git repo. This test exercises the JSON-object- + # collapse regression for empty session_id; project-root resolution is + # orthogonal and must not short-circuit the gate with an ALLOW. + CLAUDE_PROJECT_DIR="$T6_DIR" \ MOCK_HOOK_INPUT_LOG="$T6_INPUT_LOG" \ "$T6_DIR/plugin/scripts/rlcr-stop-gate.sh" \ --transcript-path "$T6_TRANSCRIPT" \ From 3dcb4dd6663d61f2adb3d2f60fb8e018b9cb4a18 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 20:48:58 -0700 Subject: [PATCH 71/97] Canonicalize both sides of cancel-authorization path comparison is_cancel_authorized previously compared the user-provided mv source and destination (lowercased command text) against a lowercase-only prefix derived from active_loop_dir. Now that resolve_project_root canonicalizes via realpath, a user command that uses the non-canonical form of the prefix (e.g. a symlinked ancestor) would fail authorization even when every other check passes. Canonicalize both the loop dir and the parsed src/dest through canonicalize_path, then re-lowercase, so the comparison is symmetric across symlinked prefixes. Extend the symlink-rejection check to use the canonical loop dir so the on-disk probe targets the real path. Also guard the merged-config load in loop-common.sh: when the library is sourced from .bashrc/.zshrc in a non-repo directory, project root resolution yields an empty string and load_merged_config would emit its usage error to stderr on every shell startup. Skip the call when the resolved root is empty and leave the cached config empty. Add a regression test that points a symlinked prefix at the real loop dir and confirms the helper still authorizes the cancel mv. --- hooks/lib/loop-common.sh | 59 ++++++++++++++++++++++++++------ tests/test-cancel-signal-file.sh | 31 +++++++++++++++++ 2 files changed, 79 insertions(+), 11 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 263501cb..ec4497bb 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -190,7 +190,16 @@ _LOOP_COMMON_PROJECT_ROOT="$(resolve_project_root 2>/dev/null || true)" # Config loading is best-effort: use || true so a config-load failure does not # abort sourcing before callers' dependency checks (jq, codex) are reached. # Stderr is NOT suppressed so malformed config warnings remain visible. -_LOOP_COMMON_CONFIG="$(load_merged_config "$LOOP_COMMON_PLUGIN_ROOT" "$_LOOP_COMMON_PROJECT_ROOT")" || true +# +# Skip config loading when no project root is available (e.g. humanize.sh is +# sourced from .bashrc/.zshrc in a non-repo directory like $HOME). Passing an +# empty project_root to load_merged_config would surface a usage error on +# stderr every time the shell starts. +if [[ -n "$_LOOP_COMMON_PROJECT_ROOT" ]]; then + _LOOP_COMMON_CONFIG="$(load_merged_config "$LOOP_COMMON_PLUGIN_ROOT" "$_LOOP_COMMON_PROJECT_ROOT")" || true +else + _LOOP_COMMON_CONFIG="" +fi # Load bitlesson model from merged config (controls which CLI bitlesson-select.sh uses) DEFAULT_BITLESSON_MODEL="$(get_config_value "$_LOOP_COMMON_CONFIG" "bitlesson_model" 2>/dev/null || true)" @@ -1070,10 +1079,20 @@ is_cancel_authorized() { return 4 fi + # Canonicalize the loop dir (idempotent: resolve_project_root already + # canonicalizes, but callers may supply a non-canonical override). Both + # sides of the upcoming string comparisons must be canonicalized through + # the same transformation or a symlinked prefix in the user's command + # (e.g. /var/... vs /private/var/... on macOS) will spuriously fail the + # authorization check. + local canonical_loop_dir + canonical_loop_dir="$(canonicalize_path "${active_loop_dir%/}")" + canonical_loop_dir="${canonical_loop_dir:-${active_loop_dir%/}}" + # Normalize: Replace $loop_dir and ${loop_dir} with actual path local normalized="$command_lower" local loop_dir_lower - loop_dir_lower="${active_loop_dir%/}/" + loop_dir_lower="${canonical_loop_dir}/" loop_dir_lower=$(echo "$loop_dir_lower" | tr '[:upper:]' '[:lower:]') normalized="${normalized//\$\{loop_dir\}/$loop_dir_lower}" @@ -1169,32 +1188,50 @@ is_cancel_authorized() { return 5 fi - # Normalize and validate source path + # Normalize and validate source path. + # + # Canonicalize the user-provided path so a symlinked prefix in the caller's + # command (e.g. /Users/x vs /private/Users/x on macOS, or /var vs + # /private/var) matches canonical_loop_dir resolved via resolve_project_root. + # Re-lowercase after canonicalization because realpath on case-insensitive + # filesystems may restore the original casing of path components, which + # would diverge from the already-lowercased expected_* values. src=$(_normalize_path "$src") + local src_canonical + src_canonical="$(canonicalize_path "$src")" + src_canonical="${src_canonical:-$src}" + src_canonical=$(echo "$src_canonical" | tr '[:upper:]' '[:lower:]') local expected_src_state="${loop_dir_lower}state.md" local expected_src_finalize="${loop_dir_lower}finalize-state.md" local expected_src_methodology="${loop_dir_lower}methodology-analysis-state.md" - if [[ "$src" != "$expected_src_state" ]] && [[ "$src" != "$expected_src_finalize" ]] && [[ "$src" != "$expected_src_methodology" ]]; then + if [[ "$src_canonical" != "$expected_src_state" ]] && [[ "$src_canonical" != "$expected_src_finalize" ]] && [[ "$src_canonical" != "$expected_src_methodology" ]]; then return 5 fi - # Normalize and validate destination path + # Normalize and validate destination path (same canonicalize+lowercase + # transformation as source; see src comment above for rationale). dest=$(_normalize_path "$dest") + local dest_canonical + dest_canonical="$(canonicalize_path "$dest")" + dest_canonical="${dest_canonical:-$dest}" + dest_canonical=$(echo "$dest_canonical" | tr '[:upper:]' '[:lower:]') local expected_dest="${loop_dir_lower}cancel-state.md" - if [[ "$dest" != "$expected_dest" ]]; then + if [[ "$dest_canonical" != "$expected_dest" ]]; then return 5 fi # SECURITY: Reject if source file is a symlink (filesystem check) # Determine source file by comparing against expected paths (not substring match) # This avoids vulnerability when loop directory path contains "finalize" or "methodology" + # Use canonical_loop_dir so the symlink check runs against the real on-disk + # path rather than a user-supplied non-canonical form. local src_original - if [[ "$src" == "$expected_src_methodology" ]]; then - src_original="${active_loop_dir}/methodology-analysis-state.md" - elif [[ "$src" == "$expected_src_finalize" ]]; then - src_original="${active_loop_dir}/finalize-state.md" + if [[ "$src_canonical" == "$expected_src_methodology" ]]; then + src_original="${canonical_loop_dir}/methodology-analysis-state.md" + elif [[ "$src_canonical" == "$expected_src_finalize" ]]; then + src_original="${canonical_loop_dir}/finalize-state.md" else - src_original="${active_loop_dir}/state.md" + src_original="${canonical_loop_dir}/state.md" fi if [[ -L "$src_original" ]]; then return 6 # Source is a symlink diff --git a/tests/test-cancel-signal-file.sh b/tests/test-cancel-signal-file.sh index 9eb78d56..7d0ca8b9 100755 --- a/tests/test-cancel-signal-file.sh +++ b/tests/test-cancel-signal-file.sh @@ -1342,6 +1342,37 @@ else pass "is_cancel_authorized rejects hidden variables" fi +echo "HELPER TEST 8: is_cancel_authorized accepts symlinked-prefix path" +# Regression test: when the user supplies the active-loop path through a +# symlinked prefix (e.g. /var/... on macOS resolves to /private/var/...), +# the authorization check must canonicalize both sides so it still matches. +# We simulate the scenario by creating an all-lowercase sibling layout +# (mktemp dirs contain mixed case, which would defeat realpath once the +# command is lowercased on case-sensitive filesystems), then symlinking +# from there back to the real loop dir. +setup_test_loop "helper-8" +touch "$LOOP_DIR/.cancel-requested" + +SYMLINK_ROOT=$(mktemp -d "${TMPDIR:-/tmp}/humanize-symlink-XXXXXXXX" | tr '[:upper:]' '[:lower:]') +# mktemp already lowercases when we pipe it; re-run if the resulting dir does +# not actually exist (shouldn't happen but defensive for portability). +[[ -d "$SYMLINK_ROOT" ]] || { rm -rf "$SYMLINK_ROOT" 2>/dev/null; SYMLINK_ROOT="${TMPDIR:-/tmp}/humanize-symlink-lowercase-$$"; mkdir -p "$SYMLINK_ROOT"; } + +SYMLINK_LOOP_DIR="$SYMLINK_ROOT/via-symlink" +ln -sfn "$LOOP_DIR" "$SYMLINK_LOOP_DIR" + +CANONICAL_LOOP_DIR="$(cd "$LOOP_DIR" && pwd -P)" +COMMAND_LOWER="mv ${SYMLINK_LOOP_DIR}/state.md ${SYMLINK_LOOP_DIR}/cancel-state.md" +COMMAND_LOWER=$(to_lower "$COMMAND_LOWER") + +if is_cancel_authorized "$CANONICAL_LOOP_DIR" "$COMMAND_LOWER"; then + pass "is_cancel_authorized accepts symlinked-prefix path after realpath" +else + fail "helper symlink prefix" "returns 0 (authorized)" "returns non-zero" +fi + +rm -rf "$SYMLINK_ROOT" 2>/dev/null || true + # ======================================== # Summary # ======================================== From b10fbf1f61ff54c8b99c9b07317d995df42a4187 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 21:54:50 -0700 Subject: [PATCH 72/97] Reject symlink aliases for cancel source and destination Using realpath-with-leaf-dereference on the user-provided mv source and destination lets a symlink aliasing /state.md or /cancel-state.md pass authorization. For the destination case the vulnerability is exploitable: `mv /state.md /tmp/link` (where /tmp/link -> /cancel-state.md) canonicalizes to the expected path and passes the check, but `mv` replaces the link at /tmp/link rather than creating /cancel-state.md, corrupting loop state and depositing state.md outside the loop dir. Introduce canonicalize_path_prefix in project-root.sh that resolves symlinks ONLY in the parent directory and preserves the basename verbatim. Symlinked project prefixes (e.g. /var vs /private/var) still match a canonical expected path, but a symlink at the leaf no longer impersonates the real filename. Rewire is_cancel_authorized to use the prefix-only helper for both src and dest, and document why the distinction matters in-place. Update the on-disk src_original probe to reference canonical_loop_dir so the symlink-rejection check runs against the real path rather than any user-supplied non-canonical form. Add two regression tests covering the destination and source symlink alias attacks. --- hooks/lib/loop-common.sh | 23 ++++++++++----- hooks/lib/project-root.sh | 48 ++++++++++++++++++++++++++++++++ tests/test-cancel-signal-file.sh | 45 ++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 7 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index ec4497bb..3e54e6ef 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -1190,15 +1190,21 @@ is_cancel_authorized() { # Normalize and validate source path. # - # Canonicalize the user-provided path so a symlinked prefix in the caller's - # command (e.g. /Users/x vs /private/Users/x on macOS, or /var vs - # /private/var) matches canonical_loop_dir resolved via resolve_project_root. + # Use canonicalize_path_prefix (NOT canonicalize_path): we need to resolve + # symlinks in the parent directory so a symlinked project prefix matches + # canonical_loop_dir, but we MUST NOT dereference a symlink at the leaf. + # Otherwise a symlink like /tmp/alias -> /state.md would canonicalize + # to /state.md and pass the check, but `mv` would then operate on + # the link path itself, escaping the loop directory and/or corrupting + # loop state. The on-disk symlink rejection below (src_original check) + # still fires because it probes the real state.md under canonical_loop_dir. + # # Re-lowercase after canonicalization because realpath on case-insensitive # filesystems may restore the original casing of path components, which # would diverge from the already-lowercased expected_* values. src=$(_normalize_path "$src") local src_canonical - src_canonical="$(canonicalize_path "$src")" + src_canonical="$(canonicalize_path_prefix "$src")" src_canonical="${src_canonical:-$src}" src_canonical=$(echo "$src_canonical" | tr '[:upper:]' '[:lower:]') local expected_src_state="${loop_dir_lower}state.md" @@ -1208,11 +1214,14 @@ is_cancel_authorized() { return 5 fi - # Normalize and validate destination path (same canonicalize+lowercase - # transformation as source; see src comment above for rationale). + # Normalize and validate destination path. Uses canonicalize_path_prefix + # for the same reason as src: a symlink alias pointing at the real + # cancel-state.md must NOT pass authorization, because `mv` onto a + # symlink replaces the link rather than creating /cancel-state.md, + # corrupting loop state and moving state.md outside the loop dir. dest=$(_normalize_path "$dest") local dest_canonical - dest_canonical="$(canonicalize_path "$dest")" + dest_canonical="$(canonicalize_path_prefix "$dest")" dest_canonical="${dest_canonical:-$dest}" dest_canonical=$(echo "$dest_canonical" | tr '[:upper:]' '[:lower:]') local expected_dest="${loop_dir_lower}cancel-state.md" diff --git a/hooks/lib/project-root.sh b/hooks/lib/project-root.sh index 3887788b..cb23403a 100644 --- a/hooks/lib/project-root.sh +++ b/hooks/lib/project-root.sh @@ -52,6 +52,49 @@ resolve_project_root() { printf '%s\n' "${canonical:-$root}" } +# canonicalize_path_prefix +# +# Resolves symlinks ONLY in the parent directory and reattaches the +# original basename verbatim. This is the right helper for comparing +# user-supplied filenames against an expected path inside a known +# directory: a symlink at /tmp/alias pointing at /real/loop/state.md +# MUST NOT canonicalize to /real/loop/state.md for comparison purposes, +# because `mv` operates on the link path itself. Resolving only the +# parent still lets a symlinked project prefix (e.g. /var vs /private/var +# on macOS) match a canonical expected path. +# +# If realpath on the parent fails, falls back to returning the input +# path unchanged (prefix cannot be canonicalized -> caller's comparison +# will correctly fail against a canonical expected path). +# +# Empty input prints nothing and returns 0. +# +canonicalize_path_prefix() { + local path="$1" + if [[ -z "$path" ]]; then + return 0 + fi + + local parent base parent_real + parent=$(dirname -- "$path") + base=$(basename -- "$path") + + if parent_real=$(realpath "$parent" 2>/dev/null) && [[ -n "$parent_real" ]]; then + printf '%s/%s\n' "${parent_real%/}" "$base" + return 0 + fi + + if command -v python3 >/dev/null 2>&1; then + parent_real=$(python3 -c 'import os,sys;print(os.path.realpath(sys.argv[1]))' "$parent" 2>/dev/null || true) + if [[ -n "$parent_real" ]]; then + printf '%s/%s\n' "${parent_real%/}" "$base" + return 0 + fi + fi + + printf '%s\n' "$path" +} + # canonicalize_path # # Prints the realpath of the input path. If the path itself does not @@ -60,6 +103,11 @@ resolve_project_root() { # If realpath is unavailable and python3 is missing, prints the input # path verbatim. # +# SECURITY NOTE: This helper dereferences symlinks at the leaf when +# the leaf exists. Do NOT use it to authorize a user-supplied path +# against an expected filename -- use canonicalize_path_prefix instead, +# which only resolves the parent. +# # Empty input prints nothing and returns 0. # canonicalize_path() { diff --git a/tests/test-cancel-signal-file.sh b/tests/test-cancel-signal-file.sh index 7d0ca8b9..420c5d5d 100755 --- a/tests/test-cancel-signal-file.sh +++ b/tests/test-cancel-signal-file.sh @@ -1373,6 +1373,51 @@ fi rm -rf "$SYMLINK_ROOT" 2>/dev/null || true +echo "HELPER TEST 9: is_cancel_authorized rejects destination symlink alias" +# Regression test for a P1 security issue: if the destination argument is a +# symlink that points at /cancel-state.md, canonicalizing the full +# path (leaf dereferenced) would let the alias pass authorization. `mv` +# would then operate on the link path itself, corrupting loop state and +# leaking state.md contents outside the loop dir. The fix resolves symlinks +# only in the parent directory and preserves the basename verbatim. +setup_test_loop "helper-9" +touch "$LOOP_DIR/.cancel-requested" +# Create the target file so the symlink would resolve if the prefix-only +# canonicalizer were relaxed back to full canonicalization. +touch "$LOOP_DIR/cancel-state.md" +ln -sfn "$LOOP_DIR/cancel-state.md" "$TEST_DIR/dest-alias" + +COMMAND_LOWER="mv ${LOOP_DIR}/state.md ${TEST_DIR}/dest-alias" +COMMAND_LOWER=$(to_lower "$COMMAND_LOWER") + +if is_cancel_authorized "$LOOP_DIR" "$COMMAND_LOWER"; then + fail "helper dest symlink alias" "returns non-zero (rejected)" "returns 0 (authorized)" +else + pass "is_cancel_authorized rejects destination symlink alias" +fi +rm -f "$TEST_DIR/dest-alias" "$LOOP_DIR/cancel-state.md" + +echo "HELPER TEST 10: is_cancel_authorized rejects source symlink alias" +# Regression test for a P1 security issue: if the source argument is a +# symlink aliasing /state.md, dereferencing the leaf would let it +# pass authorization. The on-disk symlink check (src_original) below +# would still catch this specific case because it probes the real path, +# but we defend in depth: the path comparison must reject the alias on +# its own. +setup_test_loop "helper-10" +touch "$LOOP_DIR/.cancel-requested" +ln -sfn "$LOOP_DIR/state.md" "$TEST_DIR/src-alias" + +COMMAND_LOWER="mv ${TEST_DIR}/src-alias ${LOOP_DIR}/cancel-state.md" +COMMAND_LOWER=$(to_lower "$COMMAND_LOWER") + +if is_cancel_authorized "$LOOP_DIR" "$COMMAND_LOWER"; then + fail "helper src symlink alias" "returns non-zero (rejected)" "returns 0 (authorized)" +else + pass "is_cancel_authorized rejects source symlink alias" +fi +rm -f "$TEST_DIR/src-alias" + # ======================================== # Summary # ======================================== From cf17140050c4e063f27924c2d56cc2279d81f4cd Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 16 Apr 2026 22:01:08 -0700 Subject: [PATCH 73/97] Use prefix-only canonicalization in read/write path validators The main FILE_PATH vs CORRECT_PATH comparison in loop-read-validator.sh and loop-write-validator.sh previously used canonicalize_path, which dereferences symlinks at the leaf. A planted symlink at the correct filename inside the loop dir would then canonicalize to its target and let the validator approve a Read or Write that follows the link out of the loop dir, expanding Claude's effective file-access reach beyond what the hook intends to permit. Switch both validators to canonicalize_path_prefix so a symlinked project ancestor still resolves correctly (the original bug this feature fixes) while a symlink at the leaf no longer impersonates the expected filename. This matches the same discipline applied to is_cancel_authorized and preserves the intended semantics that the basename is compared verbatim. Methodology-analysis checks in these validators intentionally keep full-path realpath because they use prefix-containment (path starts with loop-dir/) rather than equality, which correctly catches symlinks escaping the loop dir. --- hooks/loop-read-validator.sh | 10 ++++++---- hooks/loop-write-validator.sh | 16 +++++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 5a27fc4f..b812288a 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -304,10 +304,12 @@ fi CORRECT_PATH="$ACTIVE_LOOP_DIR/$CLAUDE_FILENAME" -# Compare canonical (symlink-resolved) forms -- see loop-write-validator.sh -# for the rationale; the same reasoning applies to read paths. -_READ_FILE_REAL=$(canonicalize_path "$FILE_PATH") -_READ_CORRECT_REAL=$(canonicalize_path "$CORRECT_PATH") +# Compare prefix-canonical forms -- see loop-write-validator.sh for the +# rationale; the same reasoning applies to read paths. A planted symlink +# at the leaf would otherwise let a Read follow the link outside the loop +# dir and still pass this validator. +_READ_FILE_REAL=$(canonicalize_path_prefix "$FILE_PATH") +_READ_CORRECT_REAL=$(canonicalize_path_prefix "$CORRECT_PATH") if [[ "${_READ_FILE_REAL:-$FILE_PATH}" != "${_READ_CORRECT_REAL:-$CORRECT_PATH}" ]]; then FALLBACK="# Wrong Directory Path diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 9f46c102..eac0744c 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -331,13 +331,15 @@ fi CORRECT_PATH="$ACTIVE_LOOP_DIR/$CLAUDE_FILENAME" -# Compare canonical (symlink-resolved) forms so the check is not fooled by -# equivalent paths expressed in different prefix forms (e.g. /var/... vs -# /private/var/... on macOS). A raw string compare would mis-handle a -# symlinked project prefix whenever one side was canonicalized upstream -# (e.g. by resolve_project_root) and the other was not. -_WRITE_FILE_REAL=$(canonicalize_path "$FILE_PATH") -_WRITE_CORRECT_REAL=$(canonicalize_path "$CORRECT_PATH") +# Compare prefix-canonical forms so the check is not fooled by equivalent +# paths expressed in different ancestor forms (e.g. /var/... vs /private/var/... +# on macOS) -- without dereferencing the leaf. Using full realpath here +# would let a planted symlink at / pointing outside +# the loop dir approve a write through the link, escalating Claude's write +# reach beyond the loop dir. canonicalize_path_prefix resolves the parent +# directory only; the basename is compared verbatim. +_WRITE_FILE_REAL=$(canonicalize_path_prefix "$FILE_PATH") +_WRITE_CORRECT_REAL=$(canonicalize_path_prefix "$CORRECT_PATH") if [[ "${_WRITE_FILE_REAL:-$FILE_PATH}" != "${_WRITE_CORRECT_REAL:-$CORRECT_PATH}" ]]; then FALLBACK="# Wrong Directory Path From 68bde283b55486ccbb5111a8186cdfa43475ae78 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Fri, 17 Apr 2026 09:48:26 -0700 Subject: [PATCH 74/97] Tighten cancel-rlcr-loop allowlist regex in methodology phase The optional path-prefix group in the allowlist regex used [^"]* which matched whitespace, letting any command ending in the literal token cancel-rlcr-loop.sh short-circuit the read-only guard. Payloads such as bash cancel-rlcr-loop.sh, touch evil.txt cancel-rlcr-loop.sh, or tee cancel-rlcr-loop.sh passed the allowlist and never reached the downstream interpreter, shell, file-modification, and redirection bans. Restrict the prefix to a single whitespace-free token and require the script name to be followed by whitespace or end-of-line, so only the intended forms cancel-rlcr-loop.sh, ./path/cancel-rlcr-loop.sh, and /abs/cancel-rlcr-loop.sh (optionally quoted) are allowed. --- hooks/loop-bash-validator.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 3616f3d7..85af0580 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -80,8 +80,13 @@ _MA_BASH_DIR="$ACTIVE_LOOP_DIR" if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then # Allow cancel-rlcr-loop.sh only as the leading command (not as an argument - # to another command like cp/mv). Reject if chained with shell operators. - if echo "$COMMAND_LOWER" | grep -qE '^[[:space:]]*("?[^"]*/?)?cancel-rlcr-loop\.sh' && \ + # to another command like cp/mv). The optional path prefix must be a single + # token with no embedded whitespace, otherwise commands like + # `bash cancel-rlcr-loop.sh` or `tee cancel-rlcr-loop.sh` would match. + # The script name must be followed by whitespace or end-of-line so trailing + # tokens cannot hide additional arguments. Reject if chained with shell + # operators. + if echo "$COMMAND_LOWER" | grep -qE '^[[:space:]]*"?([^[:space:]"]+/)?cancel-rlcr-loop\.sh"?([[:space:]]|$)' && \ ! echo "$COMMAND_LOWER" | grep -qE '[;|&]'; then exit 0 fi From 47ee97db62653a30cc6edd84ccb91c06aad01961 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Fri, 17 Apr 2026 13:40:45 -0700 Subject: [PATCH 75/97] Reject unresolvable symlinks in methodology write/edit validators The realpath-unavailable fallback in loop-write-validator.sh and loop-edit-validator.sh kept the raw path for the loop-dir prefix check. A symlink planted at /methodology-analysis-report.md (or ...-done.md) could satisfy the string-prefix match while pointing at a target outside the loop dir; the basename allowlist would then approve writes or edits to arbitrary files during methodology-analysis mode, bypassing the post-signoff write lock. Mirror the symlink rejection already used by the read validator: if the leaf is a symlink we cannot canonicalize, fail closed with an explicit block message. The branch only fires when realpath is missing, matching the targeted BSD/macOS fallback surface. --- hooks/loop-edit-validator.sh | 10 ++++++++++ hooks/loop-write-validator.sh | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 32b3bbe9..fb9f8e1b 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -85,6 +85,16 @@ if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.m Path contains traversal segments that cannot be resolved without realpath." >&2 exit 2 fi + # Fail closed if the leaf is a symlink we cannot resolve; the raw + # path would satisfy the loop-dir prefix check while pointing at a + # target outside the loop, letting the basename allowlist approve + # edits to arbitrary files during methodology-analysis mode. + if [[ -L "$FILE_PATH" ]]; then + echo "# Edit Blocked During Methodology Analysis + +Path is a symlink that cannot be resolved without realpath." >&2 + exit 2 + fi if [[ "$FILE_PATH" == /* ]]; then _ma_real_path="$FILE_PATH" else diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index eac0744c..1d8f1e31 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -104,6 +104,16 @@ if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.m Path contains traversal segments that cannot be resolved without realpath." >&2 exit 2 fi + # Fail closed if the leaf is a symlink we cannot resolve; the raw + # path would satisfy the loop-dir prefix check while pointing at a + # target outside the loop, letting the basename allowlist approve + # writes to arbitrary files during methodology-analysis mode. + if [[ -L "$FILE_PATH" ]]; then + echo "# Write Blocked During Methodology Analysis + +Path is a symlink that cannot be resolved without realpath." >&2 + exit 2 + fi if [[ "$FILE_PATH" == /* ]]; then _ma_real_path="$FILE_PATH" else From a4fbccd2ce229f611b48417a871b4704dcb348cd Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Fri, 17 Apr 2026 13:43:53 -0700 Subject: [PATCH 76/97] Remove version bump hint from CLAUDE.md --- .claude/CLAUDE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 976f54fd..3298b26c 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -3,7 +3,7 @@ This is a Claude Code plugin that provides iterative development with Codex revi # Humanize Project Rules - Everything about this project, including but not limited to implementations, comments, tests and documentations should be in English. No Emoji or CJK char is allowed. -- If under `main` branch, every commit MUST include a version bump in `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json` and `README.md` (the "Current Version" line). If not under `main` branch, please make sure that the current branch's `version` in those three files has a incremental update compared to that of `main` branch. The `version` must be identical in those three files. +- If version bump is required, please bump them in three files: `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json` and `README.md` (the "Current Version" line). - Version number must be in format of `X.Y.Z` where X/Y/Z is numeric number. Version MUST NOT include anything other than `X.Y.Z`. For example, a good version is `9.732.42`; Bad version examples (MUST NOT USE): `3.22.7-alpha` (extra "-alpha" string), `9.77.2 (2026-01-07)` (useless date/timestamp). - The plan template in `commands/gen-plan.md` (Phase 5 Plan Structure section) and `prompt-template/plan/gen-plan-template.md` are intentionally kept in sync. When modifying either file, ensure both are updated to maintain consistency. - Conversely, changes to `prompt-template/plan/gen-plan-template.md` must also be reflected in the Plan Structure section of `commands/gen-plan.md`. From 22e52bd9f6488b7064a1118c37c9a9b8599b8936 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Fri, 17 Apr 2026 13:59:39 -0700 Subject: [PATCH 77/97] Wire test-disable-nested-codex-hooks into run-all-tests The nested-hook regression test was added to tests/ but never registered in run-all-tests.sh TEST_SUITES, so CI's main entrypoint skipped the --disable codex_hooks coverage. Regressions in nested Codex invocation handling could ship without tripping any check. Fixes also required to make the test actually pass once wired in: - Mock codex now answers `--help` with output that advertises `--disable`, matching the feature probe in loop-codex-stop-hook.sh. Without this, the probe cached "no support" and the hook emitted argv without the guard flag, so the assertions failed. - Set executable bit so run-all-tests.sh can dispatch the script. --- tests/run-all-tests.sh | 1 + tests/test-disable-nested-codex-hooks.sh | 13 +++++++++++++ 2 files changed, 14 insertions(+) mode change 100644 => 100755 tests/test-disable-nested-codex-hooks.sh diff --git a/tests/run-all-tests.sh b/tests/run-all-tests.sh index a39d9ab1..00373b45 100755 --- a/tests/run-all-tests.sh +++ b/tests/run-all-tests.sh @@ -87,6 +87,7 @@ TEST_SUITES=( "test-config-error-handling.sh" "test-codex-hook-install.sh" "test-unified-codex-config.sh" + "test-disable-nested-codex-hooks.sh" # Session ID and Agent Teams tests "test-session-id.sh" "test-agent-teams.sh" diff --git a/tests/test-disable-nested-codex-hooks.sh b/tests/test-disable-nested-codex-hooks.sh old mode 100644 new mode 100755 index 3d70c870..90a1de83 --- a/tests/test-disable-nested-codex-hooks.sh +++ b/tests/test-disable-nested-codex-hooks.sh @@ -70,6 +70,19 @@ setup_mock_codex() { mkdir -p "$bin_dir" cat > "$bin_dir/codex" < + +Options: + --disable Disable a specific Codex hook (e.g. codex_hooks) + --skip-git-repo-check Skip git repo validation +HELP + exit 0 +fi + printf '%s\n' "\$*" > "$args_file" subcommand="" From 0ca864d51675236589c1b86f62f394c3b1cae700 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Fri, 17 Apr 2026 14:04:36 -0700 Subject: [PATCH 78/97] Place --disable codex_hooks after the Codex subcommand The Codex CLI documents that global flags for a subcommand must be passed after the subcommand so they apply as intended. The stop hook was invoking nested reviewer calls as: codex --disable codex_hooks exec ... codex --disable codex_hooks review ... With the flag in front of the subcommand, Codex may ignore it, which re-enables the native Stop hook inside the nested invocation and reintroduces the Stop-hook recursion under RLCR. Move CODEX_DISABLE_HOOKS_ARGS after the exec/review subcommand in both the executed command and the logged command-line trace. Update the regression test to assert the new argv order. --- hooks/loop-codex-stop-hook.sh | 8 ++++---- tests/test-disable-nested-codex-hooks.sh | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index f96ffd53..cc481c39 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -1218,14 +1218,14 @@ Provider: codex echo "# Review base ($review_base_type): $review_base" echo "# Timeout: $CODEX_TIMEOUT seconds" echo "" - echo "codex ${CODEX_DISABLE_HOOKS_ARGS[*]} review --base $review_base ${CODEX_REVIEW_ARGS[*]}" + echo "codex review ${CODEX_DISABLE_HOOKS_ARGS[*]} --base $review_base ${CODEX_REVIEW_ARGS[*]}" } > "$CODEX_REVIEW_CMD_FILE" echo "Code review command saved to: $CODEX_REVIEW_CMD_FILE" >&2 echo "Running codex review with timeout ${CODEX_TIMEOUT}s in $PROJECT_ROOT (base: $review_base)..." >&2 CODEX_REVIEW_EXIT_CODE=0 - (cd "$PROJECT_ROOT" && run_with_timeout "$CODEX_TIMEOUT" codex "${CODEX_DISABLE_HOOKS_ARGS[@]}" review --base "$review_base" "${CODEX_REVIEW_ARGS[@]}") \ + (cd "$PROJECT_ROOT" && run_with_timeout "$CODEX_TIMEOUT" codex review "${CODEX_DISABLE_HOOKS_ARGS[@]}" --base "$review_base" "${CODEX_REVIEW_ARGS[@]}") \ > "$CODEX_REVIEW_LOG_FILE" 2>&1 || CODEX_REVIEW_EXIT_CODE=$? echo "Code review exit code: $CODEX_REVIEW_EXIT_CODE" >&2 @@ -1644,7 +1644,7 @@ CODEX_PROMPT_CONTENT=$(cat "$REVIEW_PROMPT_FILE") echo "# Working directory: $PROJECT_ROOT" echo "# Timeout: $CODEX_TIMEOUT seconds" echo "" - echo "codex ${CODEX_DISABLE_HOOKS_ARGS[*]} exec ${CODEX_EXEC_ARGS[*]} \"\"" + echo "codex exec ${CODEX_DISABLE_HOOKS_ARGS[*]} ${CODEX_EXEC_ARGS[*]} \"\"" echo "" echo "# Prompt content:" echo "$CODEX_PROMPT_CONTENT" @@ -1654,7 +1654,7 @@ echo "Codex command saved to: $CODEX_CMD_FILE" >&2 echo "Running summary review with timeout ${CODEX_TIMEOUT}s..." >&2 CODEX_EXIT_CODE=0 -printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$CODEX_TIMEOUT" codex "${CODEX_DISABLE_HOOKS_ARGS[@]}" exec "${CODEX_EXEC_ARGS[@]}" - \ +printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$CODEX_TIMEOUT" codex exec "${CODEX_DISABLE_HOOKS_ARGS[@]}" "${CODEX_EXEC_ARGS[@]}" - \ > "$CODEX_STDOUT_FILE" 2> "$CODEX_STDERR_FILE" || CODEX_EXIT_CODE=$? echo "Codex exit code: $CODEX_EXIT_CODE" >&2 diff --git a/tests/test-disable-nested-codex-hooks.sh b/tests/test-disable-nested-codex-hooks.sh index 90a1de83..7178052f 100755 --- a/tests/test-disable-nested-codex-hooks.sh +++ b/tests/test-disable-nested-codex-hooks.sh @@ -188,22 +188,22 @@ REPO_IMPL="$TEST_DIR/repo-impl" setup_repo "$REPO_IMPL" run_loop_hook "$REPO_IMPL" "$TEST_DIR/impl.args" "false" -if grep -q -- '--disable codex_hooks exec' "$TEST_DIR/impl.args"; then +if grep -q -- 'exec --disable codex_hooks' "$TEST_DIR/impl.args"; then pass "implementation-phase stop hook disables codex_hooks for codex exec" else fail "implementation-phase stop hook disables codex_hooks for codex exec" \ - "--disable codex_hooks exec" "$(cat "$TEST_DIR/impl.args" 2>/dev/null || echo missing)" + "exec --disable codex_hooks" "$(cat "$TEST_DIR/impl.args" 2>/dev/null || echo missing)" fi REPO_REVIEW="$TEST_DIR/repo-review" setup_repo "$REPO_REVIEW" run_loop_hook "$REPO_REVIEW" "$TEST_DIR/review.args" "true" -if grep -q -- '--disable codex_hooks review' "$TEST_DIR/review.args"; then +if grep -q -- 'review --disable codex_hooks' "$TEST_DIR/review.args"; then pass "review-phase stop hook disables codex_hooks for codex review" else fail "review-phase stop hook disables codex_hooks for codex review" \ - "--disable codex_hooks review" "$(cat "$TEST_DIR/review.args" 2>/dev/null || echo missing)" + "review --disable codex_hooks" "$(cat "$TEST_DIR/review.args" 2>/dev/null || echo missing)" fi echo "" From cd367ee1ffdc0f76dd90a15a37577c3aca68add8 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Fri, 17 Apr 2026 14:50:41 -0700 Subject: [PATCH 79/97] Reject shell metacharacters in cancel allowlist exception The methodology-phase cancel exception returned early for any command beginning with cancel-rlcr-loop.sh that did not contain `;`, `|`, or `&`. Payloads such as cancel-rlcr-loop.sh $(touch /tmp/pwn) cancel-rlcr-loop.sh \`touch /tmp/pwn\` cancel-rlcr-loop.sh > /tmp/pwn cancel-rlcr-loop.sh\nrm -rf / still satisfied the narrow metachar check and short-circuited past the downstream file-mod, redirection, and interpreter blockers, effectively letting arbitrary write operations ride alongside the cancel invocation. Extend the reject list to include command substitution opener `$(`, backticks, redirection (`<`, `>`), and embedded newlines, and switch from `echo | grep` to a bash case expression so a newline inside the command text does not bypass the single-line grep semantics. Preserve `${CLAUDE_PLUGIN_ROOT}` variable expansion (which does not use `$(`) so the slash-command cancel path continues to work. --- hooks/loop-bash-validator.sh | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 85af0580..647b4b6b 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -84,10 +84,23 @@ if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.m # token with no embedded whitespace, otherwise commands like # `bash cancel-rlcr-loop.sh` or `tee cancel-rlcr-loop.sh` would match. # The script name must be followed by whitespace or end-of-line so trailing - # tokens cannot hide additional arguments. Reject if chained with shell - # operators. - if echo "$COMMAND_LOWER" | grep -qE '^[[:space:]]*"?([^[:space:]"]+/)?cancel-rlcr-loop\.sh"?([[:space:]]|$)' && \ - ! echo "$COMMAND_LOWER" | grep -qE '[;|&]'; then + # tokens cannot hide additional arguments. + # + # Also reject any shell metacharacter that can inject or redirect work + # after the cancel invocation: pipes/sequence/background operators, + # command substitution ($(...) or backticks), redirection (<, >), and + # multi-line payloads. The earlier narrower check only rejected ; | &, + # letting payloads like `cancel-rlcr-loop.sh $(touch /tmp/pwn)` or a + # newline-delimited second command slip past this early exit and reach + # arbitrary file modifications before the downstream blockers run. + _ma_has_shell_meta=false + case "$COMMAND_LOWER" in + *';'*|*'|'*|*'&'*|*'`'*|*'>'*|*'<'*|*'$('*|*$'\n'*) + _ma_has_shell_meta=true + ;; + esac + if [[ "$_ma_has_shell_meta" != "true" ]] && \ + echo "$COMMAND_LOWER" | grep -qE '^[[:space:]]*"?([^[:space:]"]+/)?cancel-rlcr-loop\.sh"?([[:space:]]|$)'; then exit 0 fi # Block git commands that modify the working tree From d09a28269142297cd70f6a23790f1a18367a65d6 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Fri, 17 Apr 2026 15:19:05 -0700 Subject: [PATCH 80/97] Block patch in methodology-phase Bash denylist The methodology-phase file-modification regex enumerates common write utilities (tee, mv, cp, rm, ...) but omits `patch`, so commands like patch -p0 < /tmp/diff passed the denylist and could apply arbitrary edits to tracked source files after Codex sign-off. Because methodology completion can exit without another review pass, any changes written via patch would ship unreviewed. Add `patch` to the word-boundary-anchored alternation so it is rejected alongside the other file mutators. The anchor keeps unrelated names like `gpatch` unaffected. --- hooks/loop-bash-validator.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 647b4b6b..ede35304 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -110,8 +110,8 @@ if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.m Git write commands are not allowed during the methodology analysis phase." >&2 exit 2 fi - # Block file manipulation commands (touch, mv, cp, rm, mkdir, ln, etc.) - if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(tee|install|touch|mv|cp|rm|dd|truncate|chmod|chown|mkdir|rmdir|ln|mktemp)[[:space:]]'; then + # Block file manipulation commands (touch, mv, cp, rm, mkdir, ln, patch, etc.) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(tee|install|touch|mv|cp|rm|dd|truncate|chmod|chown|mkdir|rmdir|ln|mktemp|patch)[[:space:]]'; then echo "# Bash Blocked During Methodology Analysis File modification commands are not allowed during the methodology analysis phase." >&2 From 5990827beabcfdbee6d917ebd0177984a462dd22 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Fri, 17 Apr 2026 15:19:15 -0700 Subject: [PATCH 81/97] Gate methodology completion on a clean git tree When methodology-analysis-state.md is active, the stop hook returned exit 0 as soon as complete_methodology_analysis succeeded, short- circuiting past the main git-clean gate. Tracked source edits made during the analysis phase (for example, post-Codex-signoff modifications) could slip through unreviewed the moment the completion marker appeared. Re-run the existing git-clean validation inside the methodology completion branch before allowing the terminal exit. Reuse the HUMANIZE_UNTRACKED_PATTERN filter from the main gate so legitimate .humanize/rlcr/... methodology artifacts are not themselves treated as dirty. If the filtered status is non-empty, block with the same git-not-clean template the main gate uses. --- hooks/loop-codex-stop-hook.sh | 40 ++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index cc481c39..0c191d4c 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -616,7 +616,45 @@ fi if [[ "$IS_METHODOLOGY_ANALYSIS_PHASE" == "true" ]]; then if complete_methodology_analysis; then - # Analysis complete, allow exit + # Before allowing the terminal state transition, re-verify the + # working tree is clean. The main git-clean gate below is skipped + # in the methodology branch, so without this check, tracked edits + # made during the analysis phase (e.g. post-signoff source + # modifications) could slip through unreviewed as soon as the + # completion marker appears. + # + # Apply the same .humanize/ untracked exclusion the main gate uses + # so methodology-artifact writes under .humanize/rlcr/... do not + # themselves trip the check. + if [[ "$GIT_IS_REPO" == "true" ]]; then + HUMANIZE_UNTRACKED_PATTERN='^\?\? \.humanize[-/]' + GIT_STATUS_FOR_BLOCK=$(echo "$GIT_STATUS_CACHED" | grep -vE "$HUMANIZE_UNTRACKED_PATTERN" || true) + if [[ -n "$GIT_STATUS_FOR_BLOCK" ]]; then + cleanup_stale_index_lock + FALLBACK="# Git Not Clean + +Methodology analysis is complete, but the working tree still has uncommitted changes: + +{{GIT_ISSUES}} + +Please commit all changes before allowing the loop to exit. +{{SPECIAL_NOTES}}" + REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/git-not-clean.md" "$FALLBACK" \ + "GIT_ISSUES=uncommitted changes after methodology analysis" \ + "SPECIAL_NOTES=") + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - uncommitted changes detected after methodology analysis, please commit first" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi + fi + # Analysis complete and tree clean, allow exit exit 0 else # Analysis not yet complete, block From 3a0cd7c5c333a1680f0a235fbb4c177d722e4b7b Mon Sep 17 00:00:00 2001 From: shinan6 Date: Mon, 20 Apr 2026 12:30:35 +0800 Subject: [PATCH 82/97] docs: add gen-idea design spec for directed-swarm idea drafting Introduces a design document for a new /humanize:gen-idea command that sits one step before gen-plan in the Humanize workflow. The command applies the directed-diversity insight from Anthropic's Automated W2S Researcher note: a lead picks N orthogonal directions and delegates one direction per Explore subagent, then synthesizes a repo-grounded draft suitable as gen-plan input. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../specs/2026-04-20-gen-idea-design.md | 210 ++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 docs/superpowers/specs/2026-04-20-gen-idea-design.md diff --git a/docs/superpowers/specs/2026-04-20-gen-idea-design.md b/docs/superpowers/specs/2026-04-20-gen-idea-design.md new file mode 100644 index 00000000..b2e69e10 --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-gen-idea-design.md @@ -0,0 +1,210 @@ +# gen-idea — Directed-Swarm Idea Drafting (Design) + +## Context + +Humanize today starts at `gen-plan`, which takes a user-authored draft `.md` and produces a structured plan via a single Codex first-pass + a Claude/Codex convergence loop. The draft itself — the "most valuable human input" that `gen-plan` preserves verbatim — still has to be hand-authored. + +This spec introduces a new command `/humanize:gen-idea` that sits one step earlier in the flow. It takes a loose idea (inline text or a `.md` of notes) and produces a repo-grounded draft suitable as `gen-plan`'s `--input`. + +The command borrows its core mechanic from the Anthropic alignment note *Automated W2S Researcher* (2026). That work showed that when nine agents received **different high-level directions** ("study data filtering", "study distillation", "study evolutionary search") they decisively outperformed nine agents given the same task description — directed diversity climbs faster than undirected replication. `gen-idea` applies the same insight to idea generation: instead of one LLM pass, a lead picks N orthogonal directions and delegates one direction per subagent, then synthesizes. + +## Goal + +One shipable command, `/humanize:gen-idea`, that: + +- Accepts a loose idea (inline text or `.md` path) and a desired direction count. +- Spawns N parallel read-only exploration subagents, each assigned a distinct direction. +- Writes a single draft `.md` that chooses one primary direction, lists alternatives, and grounds each in objective repo evidence. +- Produces output that passes through `gen-plan --input ` unchanged. + +No Codex, no RLCR, no auto-chaining, no relevance check, no config-loader integration. Lightweight first pass — everything downstream already exists. + +## Out of Scope (First Pass) + +- Codex involvement in idea phase (delegated to downstream `gen-plan`). +- Relevance check against repo (delegated to `gen-plan` Phase 2). +- Config-loader integration (`.humanize/config.json` not read). +- Alternative-language translation variant. +- Auto-chain to `gen-plan`. +- Test harness, CI coverage, or telemetry. +- `--directions` override flag (directions are LLM-picked per topic). + +Each of these may land in a follow-up once the primary flow is proven. + +## Command Signature + +``` +/humanize:gen-idea [--n 6] [--output ] +``` + +**Input auto-detection**: if the positional arg resolves to an existing file AND ends in `.md`, it is read as file content; otherwise the arg is treated verbatim as inline idea text. + +**Parameters** + +- Positional (required): idea body as inline text or path to a `.md` file. Must be non-empty after parsing. +- `--n ` (optional, default `6`): direction count. Valid range `[2, 10]`. Out-of-range stops the command. +- `--output ` (optional): target draft path. Default `.humanize/ideas/-.md` relative to project root. + +**Slug construction** + +- File input → filename stem (extension removed). +- Inline input → lowercase first ~40 chars of the idea, strip non-alphanumeric (keep `-`), collapse dash runs, trim leading/trailing dashes. +- Empty result → fallback `idea`. + +**Path behavior** + +- For the **default** `--output`, the command auto-creates `.humanize/ideas/` if missing. This matches the implicit contract that Humanize owns its own `.humanize/` subtree. +- For a **user-supplied** `--output`, the parent directory must already exist. This mirrors `gen-plan`'s stance and avoids silently creating arbitrary directories. +- Output file must not already exist. Refuse to overwrite. + +## Architecture + +Five phases inside a single command file `commands/gen-idea.md`, strictly sequential: + +### Phase 0 — Parse Input + +Parse `$ARGUMENTS`. Set `IDEA_INPUT`, `N` (default 6), `OUTPUT_FILE` (default path if unset). + +### Phase 1 — IO Validation + +Call `scripts/validate-gen-idea-io.sh` with the resolved flags. The script: + +- Distinguishes inline vs file input. +- Writes inline text to a tempfile under `$TMPDIR` and prints its path (so downstream phases always consume a file, simplifying the command body). +- Verifies `--n` is an integer in `[2, 10]`. +- Creates the default output directory when the default path is used; rejects non-existent parent directory for user-supplied paths. +- Refuses to overwrite an existing output file. +- Locates `prompt-template/idea/gen-idea-template.md`. + +Exit codes parallel `validate-gen-plan-io.sh` (distinct failures get distinct codes; `6` is "invalid arguments"; `7` is "template missing"). + +Script stdout contains `INPUT_MODE`, `IDEA_BODY_FILE`, `OUTPUT_FILE`, `SLUG`, `TEMPLATE_FILE`, `N` for the command to consume. + +### Phase 2 — Direction Generation + +One Claude pass. Inputs: the idea body, the repo README, the project `CLAUDE.md` (if any), and a top-level directory listing. Output: exactly `N` orthogonal directions, each with: + +- A short **name** (2–5 words). +- A **one-sentence rationale** explaining *why this angle is distinct from the others*. + +Orthogonality is the hard constraint — two near-duplicate directions defeat the W2S premise. The generation prompt names this explicitly and requires the model to flag and replace any near-duplicates before returning. + +### Phase 3 — Parallel Exploration + +Single Task-tool invocation block with N parallel `Explore` subagents. Each subagent receives: + +- The verbatim idea body. +- Its single assigned direction (name + rationale). +- Instruction to produce a structured mini-proposal with **objective evidence** — references to specific repo paths, existing patterns worth extending, measurable considerations (rough complexity, LOC surface, perf implications) where discoverable. Read-only; no writes. +- Explicit instruction to report "exploratory, no concrete precedent" verbatim if no evidence is found. Fabrication is forbidden. + +Each subagent returns a proposal block with fields: `APPROACH_SUMMARY`, `OBJECTIVE_EVIDENCE` (bullet list), `KNOWN_RISKS`, `CONFIDENCE` (`high` / `medium` / `low`). + +### Phase 4 — Synthesis & Write + +The Lead (main command body, same model context) reviews all returned proposals and: + +1. Picks the strongest direction as **primary**, factoring in: evidence density, fit with repo patterns, implementation surface area, and declared confidence. +2. Populates the template in this order: inferred title → `Original Idea` (verbatim copy of the idea body) → `Primary Direction` section (filled from the chosen proposal) → `Alternative Directions Considered` (each remaining direction in Alt-1..Alt-(N-1) order, with "Why not primary" line) → `Synthesis Notes` (which alt elements could fold into primary). +3. Writes the finalized draft to `OUTPUT_FILE` via `Write`. +4. Reports path + one-line summary to the user. + +## Draft Output Format + +Rendered from `prompt-template/idea/gen-idea-template.md`: + +```markdown +# + +## Original Idea + + +## Primary Direction: + +### Rationale + + +### Approach Summary + + +### Objective Evidence +- +- +- + +### Known Risks + + +## Alternative Directions Considered + +### Alt-1: +- Gist: +- Objective Evidence: + - +- Why not primary: + +### Alt-2 ... Alt-(N-1) + + +## Synthesis Notes + +``` + +Two invariants: + +1. `Original Idea` is byte-identical to the user's input. Mirrors `gen-plan`'s "draft is the most valuable human input" principle. +2. The draft is a complete, self-contained design — not a set of open questions. This is what lets it pass `gen-plan` Phase 2 and feed Phase 3 meaningfully. + +## Agent Topology + +``` +user idea (inline | file) + | + v +[Phase 2: Lead — generate N orthogonal directions] + | + +--> [Explore #1, direction A] --+ + +--> [Explore #2, direction B] --| + +--> [Explore #3, direction C] --|--> [Phase 4: Lead — synthesize] + +--> ... --| | + +--> [Explore #N, direction N] --+ v + draft.md +``` + +All parallel subagents are `Explore` (read-only). No new subagent type is introduced. + +## Error Handling + +- **Direction generation returns fewer than N**: retry the Phase 2 call once, asking for exactly N orthogonal directions. After the retry, if at least 2 directions are returned, proceed with the reduced count and log a warning; with fewer than 2, stop. +- **One Explore subagent fails**: drop it and continue synthesis with the rest. With fewer than 2 successful proposals, stop with error `exploration phase degraded; retry`. +- **No objective evidence for a direction**: subagent reports `exploratory, no concrete precedent`; that text is preserved verbatim in the draft. Never fabricate references. +- **Inline idea shorter than 10 characters**: warn and proceed (user's call — some valid ideas are terse). +- **Input file unreadable / not `.md`**: IO validation exits with distinct error code, parallel to `validate-gen-plan-io.sh` semantics. + +## Files to Add or Modify + +1. `commands/gen-idea.md` — new command spec (estimated ~150–200 lines, structured like a lean subset of `gen-plan.md`). +2. `prompt-template/idea/gen-idea-template.md` — new template file matching the format above. +3. `scripts/validate-gen-idea-io.sh` — new IO validation + slug resolution script, modeled on `validate-gen-plan-io.sh`. +4. `README.md` — add a one-line Quick Start entry for `gen-idea` above the `gen-plan` step; bump `Current Version` to `1.16.1`. +5. `.claude-plugin/plugin.json` — bump `version` to `1.16.1`. +6. `.claude-plugin/marketplace.json` — bump `version` to `1.16.1` (three-file version sync is a project-level rule). + +No test harness is added in this first pass. + +## Acceptance (Smoke-Level) + +- `/humanize:gen-idea "add undo/redo to the editor"` writes a `.md` under `.humanize/ideas/` with all required sections populated — one primary direction plus five alternatives (`N=6` total). +- `/humanize:gen-idea notes/rough.md --n 3 --output tmp/draft.md` reads the file, writes to `tmp/draft.md`, with exactly one primary and two alternatives. +- `/humanize:gen-idea ""` stops with a clear "missing idea" error. +- `/humanize:gen-idea "x" --n 1` stops with an out-of-range error. +- `/humanize:gen-idea "x" --output ` refuses to overwrite. +- The resulting draft fed into `/humanize:gen-plan --input --output plan.md` passes Phase 2 relevance check and produces a structured plan without human edits. + +## Future Extensions (Out of Scope Here) + +- Codex pass over the synthesized draft for independent sanity check. +- Optional `--chain-to-gen-plan` flag that invokes `gen-plan` on the written draft. +- `--directions "..."` override for users who want to pin angles. +- Config-loader integration for alternative-language draft variants. +- Relevance check mirroring `gen-plan` Phase 2 to fail early when an idea is clearly unrelated to the repo. From 84ffc93bceee23fe2d3c886e94bf47621b7381a8 Mon Sep 17 00:00:00 2001 From: shinan6 Date: Mon, 20 Apr 2026 12:39:45 +0800 Subject: [PATCH 83/97] docs: add gen-idea implementation plan Five-task plan covering the draft template, IO validation script, command spec, README/version bumps, and an end-to-end smoke check. Targets dev for the 1.16.1 release. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/superpowers/plans/2026-04-20-gen-idea.md | 904 ++++++++++++++++++ 1 file changed, 904 insertions(+) create mode 100644 docs/superpowers/plans/2026-04-20-gen-idea.md diff --git a/docs/superpowers/plans/2026-04-20-gen-idea.md b/docs/superpowers/plans/2026-04-20-gen-idea.md new file mode 100644 index 00000000..140cda97 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-gen-idea.md @@ -0,0 +1,904 @@ +# gen-idea Command Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Ship a `/humanize:gen-idea` slash command that takes a loose idea (inline text or `.md`) and writes a repo-grounded draft suitable as `/humanize:gen-plan`'s `--input`. + +**Architecture:** Five-phase command executed by Claude inside a single `commands/gen-idea.md` spec. A bash validation script resolves I/O and slug; Phase 2 produces N orthogonal directions; Phase 3 fans out N parallel `Explore` subagents in a single Task-tool message; Phase 4 synthesizes one primary direction plus N−1 alternatives into a templated draft. + +**Tech Stack:** Claude Code plugin command markdown, bash (validation + slug generation), `Explore` subagents via the Task tool, `Write` tool for final draft. No Codex, no tests, no config-loader integration in this first pass. + +**Spec reference:** `docs/superpowers/specs/2026-04-20-gen-idea-design.md` + +**Scope rule:** The project convention (`.claude/CLAUDE.md`) requires every commit on a non-`main` branch to keep `version` in `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json`, and `README.md` above `main`'s version. Branch `add-gen-idea-command` currently inherits `1.16.0` from `origin/dev`, which is already above `main`'s `1.15.4`. Task 5 bumps all three files to `1.16.1` in a single commit that also ships the README Quick Start entry. + +--- + +## File Structure + +Files created or modified by this plan: + +- **Create** `prompt-template/idea/gen-idea-template.md` — static markdown skeleton Claude fills in Phase 4. Mirrors `prompt-template/plan/gen-plan-template.md` style. +- **Create** `scripts/validate-gen-idea-io.sh` — pure bash; parses args, detects inline vs file input, generates slug, resolves default output path, validates `--n` range and output writability, locates template. Modeled on `scripts/validate-gen-plan-io.sh`. +- **Create** `commands/gen-idea.md` — the slash-command spec. Frontmatter declares `allowed-tools`; body drives Claude through phases 0–4. Modeled on a lean subset of `commands/gen-plan.md`. +- **Modify** `README.md` — add one Quick Start bullet above the `gen-plan` entry; bump `Current Version` to `1.16.1`. +- **Modify** `.claude-plugin/plugin.json` — bump `version` to `1.16.1`. +- **Modify** `.claude-plugin/marketplace.json` — bump `plugins[0].version` to `1.16.1`. + +No new subagent type, no test harness, no hook changes. + +--- + +## Task 1: Create the Draft Template + +**Files:** +- Create: `prompt-template/idea/gen-idea-template.md` + +- [ ] **Step 1.1: Create the template directory and file** + +Run: +```bash +mkdir -p prompt-template/idea +``` + +Write `prompt-template/idea/gen-idea-template.md`: + +```markdown +# + +## Original Idea + +<ORIGINAL_IDEA> + +## Primary Direction: <PRIMARY_NAME> + +### Rationale + +<PRIMARY_RATIONALE> + +### Approach Summary + +<PRIMARY_APPROACH_SUMMARY> + +### Objective Evidence + +<PRIMARY_OBJECTIVE_EVIDENCE> + +### Known Risks + +<PRIMARY_KNOWN_RISKS> + +## Alternative Directions Considered + +<ALTERNATIVES> + +## Synthesis Notes + +<SYNTHESIS_NOTES> +``` + +Placeholders Claude replaces in Phase 4: +- `<TITLE>` — inferred 4–10 word Title Case title. +- `<ORIGINAL_IDEA>` — verbatim copy of user's inline text or `.md` contents. Byte-identical. +- `<PRIMARY_NAME>` — chosen direction's short name. +- `<PRIMARY_RATIONALE>` / `<PRIMARY_APPROACH_SUMMARY>` / `<PRIMARY_OBJECTIVE_EVIDENCE>` (bulleted) / `<PRIMARY_KNOWN_RISKS>` — populated from the chosen proposal. +- `<ALTERNATIVES>` — block containing Alt-1..Alt-(N−1) subsections, each with `### Alt-N: <name>`, `- Gist:`, `- Objective Evidence:` (bullets), `- Why not primary:`. +- `<SYNTHESIS_NOTES>` — one paragraph on which alt elements could fold into the primary. + +- [ ] **Step 1.2: Verify the file is written correctly** + +Run: +```bash +cat prompt-template/idea/gen-idea-template.md +``` + +Expected: the exact content above prints to stdout, no errors. + +- [ ] **Step 1.3: Commit** + +Run: +```bash +git add prompt-template/idea/gen-idea-template.md +git commit -m "$(cat <<'EOF' +feat(gen-idea): add draft template for directed-swarm output + +Skeleton populated by the command's Phase 4 synthesis step. Placeholders +<TITLE>, <ORIGINAL_IDEA>, <PRIMARY_*>, <ALTERNATIVES>, <SYNTHESIS_NOTES> +are filled deterministically by the command body. + +Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> +EOF +)" +``` + +Expected: commit succeeds; `git log -1 --stat` shows the new file added. + +--- + +## Task 2: Create the IO Validation Script + +**Files:** +- Create: `scripts/validate-gen-idea-io.sh` + +- [ ] **Step 2.1: Write the script** + +Write `scripts/validate-gen-idea-io.sh`: + +```bash +#!/usr/bin/env bash +# validate-gen-idea-io.sh +# Validates input, slug, and output paths for the gen-idea command. +# Exit codes: +# 0 - Success +# 1 - Missing idea input or empty input file +# 2 - Input looks like a path but is unreadable, not .md, or does not exist +# 3 - Output parent directory does not exist (user-supplied path only) +# 4 - Output file already exists +# 5 - No write permission to output directory +# 6 - Invalid arguments (including --n out of range) +# 7 - Template file not found (plugin configuration error) + +set -e + +usage() { + echo "Usage: $0 <idea-text-or-path> [--n <int>] [--output <path>]" + echo "" + echo "Arguments:" + echo " <idea-text-or-path> Inline idea text OR path to an existing .md file (required)" + echo " --n Number of directions (default: 6; range: 2-10)" + echo " --output Output draft path (default: .humanize/ideas/<slug>-<timestamp>.md)" + echo " -h, --help Show this help message" + exit 6 +} + +IDEA_INPUT="" +N=6 +OUTPUT_FILE="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --n) + if [[ $# -lt 2 || "$2" == --* ]]; then + echo "ERROR: --n requires a value" + usage + fi + N="$2" + shift 2 + ;; + --output) + if [[ $# -lt 2 || "$2" == --* ]]; then + echo "ERROR: --output requires a value" + usage + fi + OUTPUT_FILE="$2" + shift 2 + ;; + -h|--help) + usage + ;; + --*) + echo "ERROR: Unknown option: $1" + usage + ;; + *) + if [[ -z "$IDEA_INPUT" ]]; then + IDEA_INPUT="$1" + shift + else + echo "ERROR: Unexpected positional argument: $1" + usage + fi + ;; + esac +done + +if [[ -z "$IDEA_INPUT" ]]; then + echo "VALIDATION_ERROR: MISSING_IDEA" + echo "No idea provided. Pass inline text or a .md file path as the first argument." + exit 1 +fi + +if ! [[ "$N" =~ ^[0-9]+$ ]]; then + echo "VALIDATION_ERROR: INVALID_N" + echo "--n must be a non-negative integer; got: $N" + exit 6 +fi +if (( N < 2 || N > 10 )); then + echo "VALIDATION_ERROR: N_OUT_OF_RANGE" + echo "--n must be between 2 and 10 inclusive; got: $N" + exit 6 +fi + +INPUT_MODE="" +IDEA_BODY_FILE="" +SLUG="" + +looks_like_path=false +if [[ "$IDEA_INPUT" == *.md || "$IDEA_INPUT" == */* ]]; then + looks_like_path=true +fi + +if [[ -f "$IDEA_INPUT" ]]; then + if [[ "$IDEA_INPUT" != *.md ]]; then + echo "VALIDATION_ERROR: INPUT_NOT_MD" + echo "File input must have .md extension; got: $IDEA_INPUT" + exit 2 + fi + if [[ ! -s "$IDEA_INPUT" ]]; then + echo "VALIDATION_ERROR: INPUT_EMPTY" + echo "Input file is empty: $IDEA_INPUT" + exit 1 + fi + INPUT_MODE="file" + IDEA_BODY_FILE="$(realpath "$IDEA_INPUT")" + base="$(basename "$IDEA_INPUT")" + SLUG="${base%.md}" +elif [[ "$looks_like_path" == true ]]; then + echo "VALIDATION_ERROR: INPUT_NOT_FOUND" + echo "Looks like a file path but does not exist: $IDEA_INPUT" + exit 2 +else + INPUT_MODE="inline" + TMPFILE="$(mktemp "${TMPDIR:-/tmp}/gen-idea-inline-XXXXXX.md")" + printf '%s\n' "$IDEA_INPUT" > "$TMPFILE" + IDEA_BODY_FILE="$TMPFILE" + slug_raw="$(printf '%s' "$IDEA_INPUT" | head -c 40 | tr '[:upper:]' '[:lower:]' | sed -E 's/[^a-z0-9-]+/-/g' | sed -E 's/-+/-/g' | sed -E 's/^-+//; s/-+$//')" + if [[ -z "$slug_raw" ]]; then + slug_raw="idea" + fi + SLUG="$slug_raw" +fi + +PROJECT_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" + +DEFAULT_OUTPUT=false +if [[ -z "$OUTPUT_FILE" ]]; then + TIMESTAMP="$(date +%Y%m%d-%H%M%S)" + OUTPUT_FILE="$PROJECT_ROOT/.humanize/ideas/${SLUG}-${TIMESTAMP}.md" + DEFAULT_OUTPUT=true +fi + +OUTPUT_FILE="$(realpath -m "$OUTPUT_FILE" 2>/dev/null || echo "$OUTPUT_FILE")" +OUTPUT_DIR="$(dirname "$OUTPUT_FILE")" + +if [[ "$DEFAULT_OUTPUT" == true ]]; then + mkdir -p "$OUTPUT_DIR" 2>/dev/null || true +fi + +if [[ ! -d "$OUTPUT_DIR" ]]; then + echo "VALIDATION_ERROR: OUTPUT_DIR_NOT_FOUND" + echo "Output directory does not exist: $OUTPUT_DIR" + exit 3 +fi + +if [[ -e "$OUTPUT_FILE" ]]; then + echo "VALIDATION_ERROR: OUTPUT_EXISTS" + echo "Output already exists: $OUTPUT_FILE" + exit 4 +fi + +if [[ ! -w "$OUTPUT_DIR" ]]; then + echo "VALIDATION_ERROR: NO_WRITE_PERMISSION" + echo "No write permission: $OUTPUT_DIR" + exit 5 +fi + +if [[ -n "${CLAUDE_PLUGIN_ROOT:-}" ]]; then + TEMPLATE_FILE="$CLAUDE_PLUGIN_ROOT/prompt-template/idea/gen-idea-template.md" +else + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" + TEMPLATE_FILE="$SCRIPT_DIR/../prompt-template/idea/gen-idea-template.md" +fi + +if [[ ! -f "$TEMPLATE_FILE" ]]; then + echo "VALIDATION_ERROR: TEMPLATE_NOT_FOUND" + echo "Template file missing: $TEMPLATE_FILE" + exit 7 +fi + +echo "VALIDATION_SUCCESS" +echo "INPUT_MODE: $INPUT_MODE" +echo "IDEA_BODY_FILE: $IDEA_BODY_FILE" +echo "OUTPUT_FILE: $OUTPUT_FILE" +echo "SLUG: $SLUG" +echo "TEMPLATE_FILE: $TEMPLATE_FILE" +echo "N: $N" +exit 0 +``` + +- [ ] **Step 2.2: Make the script executable** + +Run: +```bash +chmod +x scripts/validate-gen-idea-io.sh +ls -l scripts/validate-gen-idea-io.sh +``` + +Expected: permissions include `x` (e.g. `-rwxr-xr-x`). + +- [ ] **Step 2.3: Smoke-test happy path (inline input)** + +Run: +```bash +scripts/validate-gen-idea-io.sh "add undo/redo to the editor" --n 4 +``` + +Expected stdout contains: +``` +VALIDATION_SUCCESS +INPUT_MODE: inline +IDEA_BODY_FILE: /tmp/... (or $TMPDIR/...) +OUTPUT_FILE: .../.humanize/ideas/add-undo-redo-to-the-editor-<timestamp>.md +SLUG: add-undo-redo-to-the-editor +TEMPLATE_FILE: .../prompt-template/idea/gen-idea-template.md +N: 4 +``` +Exit code: `0`. + +If `.humanize/ideas/` was created under project root, remove it with `rmdir .humanize/ideas .humanize 2>/dev/null || true` to keep the tree clean between iterations. + +- [ ] **Step 2.4: Smoke-test happy path (file input)** + +Run: +```bash +mkdir -p /tmp/gen-idea-test +printf 'add hotkey support\n\nwith modifier chords\n' > /tmp/gen-idea-test/notes.md +scripts/validate-gen-idea-io.sh /tmp/gen-idea-test/notes.md +``` + +Expected: `INPUT_MODE: file`, `SLUG: notes`, `IDEA_BODY_FILE` equals the realpath of `/tmp/gen-idea-test/notes.md`, exit `0`. + +- [ ] **Step 2.5: Smoke-test error paths** + +Each of these must produce the noted exit code: + +```bash +# Missing idea +scripts/validate-gen-idea-io.sh ; echo "exit=$?" +# Expect: exit=1, MISSING_IDEA + +# --n out of range +scripts/validate-gen-idea-io.sh "x" --n 1 ; echo "exit=$?" +# Expect: exit=6, N_OUT_OF_RANGE + +scripts/validate-gen-idea-io.sh "x" --n 99 ; echo "exit=$?" +# Expect: exit=6, N_OUT_OF_RANGE + +# --n non-integer +scripts/validate-gen-idea-io.sh "x" --n abc ; echo "exit=$?" +# Expect: exit=6, INVALID_N + +# Path that looks like file but missing +scripts/validate-gen-idea-io.sh path/that/does/not/exist.md ; echo "exit=$?" +# Expect: exit=2, INPUT_NOT_FOUND + +# Non-.md file +touch /tmp/gen-idea-test/notes.txt +scripts/validate-gen-idea-io.sh /tmp/gen-idea-test/notes.txt ; echo "exit=$?" +# Expect: exit=2, INPUT_NOT_MD + +# Empty .md +printf '' > /tmp/gen-idea-test/empty.md +scripts/validate-gen-idea-io.sh /tmp/gen-idea-test/empty.md ; echo "exit=$?" +# Expect: exit=1, INPUT_EMPTY + +# Output already exists +touch /tmp/gen-idea-test/out.md +scripts/validate-gen-idea-io.sh "x" --output /tmp/gen-idea-test/out.md ; echo "exit=$?" +# Expect: exit=4, OUTPUT_EXISTS + +# Output parent dir missing (user-supplied) +scripts/validate-gen-idea-io.sh "x" --output /tmp/gen-idea-no-such-dir/out.md ; echo "exit=$?" +# Expect: exit=3, OUTPUT_DIR_NOT_FOUND +``` + +Cleanup: +```bash +rm -rf /tmp/gen-idea-test +rm -rf "$(pwd)/.humanize" 2>/dev/null || true +``` + +If any exit code differs, edit the script to match the expected codes and re-run the failing case before moving on. + +- [ ] **Step 2.6: Commit** + +Run: +```bash +git add scripts/validate-gen-idea-io.sh +git commit -m "$(cat <<'EOF' +feat(gen-idea): add IO validation script with slug resolution + +Detects inline vs .md file input, writes inline text to a tempfile so +downstream phases always consume a file, derives a URL-safe slug, and +resolves the default output path under .humanize/ideas/. Exit codes +parallel validate-gen-plan-io.sh. + +Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> +EOF +)" +``` + +Expected: commit succeeds. + +--- + +## Task 3: Create the Command Spec + +**Files:** +- Create: `commands/gen-idea.md` + +- [ ] **Step 3.1: Write the command spec** + +Write `commands/gen-idea.md`: + +````markdown +--- +description: "Generate a repo-grounded idea draft via directed-swarm exploration" +argument-hint: "<idea-text-or-path> [--n <int>] [--output <path>]" +allowed-tools: + - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/validate-gen-idea-io.sh:*)" + - "Read" + - "Glob" + - "Grep" + - "Task" + - "Write" +--- + +# Generate Idea Draft from Loose Input + +Read and execute below with ultrathink. + +## Hard Constraint: Draft-Only Output + +This command MUST NOT implement features, modify source code, or create commits while producing the draft. Permitted writes are limited to the single output draft file produced in Phase 4. All exploration subagents run read-only. + +This command transforms a loose idea into a repo-grounded draft suitable as input to `/humanize:gen-plan`. It applies directed-diversity exploration: a lead picks N orthogonal directions, N parallel `Explore` subagents develop each, the lead synthesizes a draft with one primary direction plus N-1 alternatives. Each direction carries objective evidence from the repo. + +## Workflow Overview + +> Sequential Execution Constraint: All phases MUST execute strictly in order. Each phase fully completes before the next. + +1. Parse Input +2. IO Validation +3. Direction Generation +4. Parallel Exploration +5. Synthesis and Write + +--- + +## Phase 0: Parse Input + +Extract from `$ARGUMENTS`: +- First positional: inline idea text or path to a `.md` file (required). +- `--n <int>`: number of directions. Default 6. +- `--output <path>`: target draft path. Default resolved by the validation script. + +Do not interpret or rewrite the idea text here. Pass `$ARGUMENTS` through to Phase 1 unchanged. + +--- + +## Phase 1: IO Validation + +Run: +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/validate-gen-idea-io.sh" $ARGUMENTS +``` + +Handle exit codes: +- `0`: Parse stdout to extract `INPUT_MODE`, `IDEA_BODY_FILE`, `OUTPUT_FILE`, `SLUG`, `TEMPLATE_FILE`, `N` (each appears on its own `KEY: value` line). Continue to Phase 2. +- `1`: Report "Missing or empty idea input" and stop. +- `2`: Report "Input looks like a file path but is missing, not readable, or not `.md`" and stop. +- `3`: Report "Output directory does not exist — please create it or choose a different path" and stop. +- `4`: Report "Output file already exists — choose a different path" and stop. +- `5`: Report "No write permission to output directory" and stop. +- `6`: Report "Invalid arguments" with the stdout usage text and stop. +- `7`: Report "Template file missing — plugin configuration error" and stop. + +Read the full contents of `IDEA_BODY_FILE` using the `Read` tool. Preserve byte-identical content in memory for later phases. + +--- + +## Phase 2: Direction Generation + +Generate exactly `N` orthogonal directions for exploring the idea. + +### Context to Gather + +Before generating directions, read: +- `README.md` at the project root. +- `CLAUDE.md` at the project root (if it exists). +- `.claude/CLAUDE.md` (if it exists). +- Top-level directory listing via `Glob` with pattern `*` (one level, no recursion). + +This context grounds the directions in the actual repo rather than generic brainstorming. + +### Generation Rules + +Produce exactly `N` direction entries. Each entry has: +- `name`: a 2-5 word short label. +- `rationale`: a single sentence explaining why this angle is distinct from the other directions. + +Hard constraint: **orthogonality**. Two near-duplicate directions defeat the directed-diversity premise. Before returning: +- If two directions feel like dupes, replace one with a genuinely different angle. +- If a direction collapses to "just do X better" with no angle distinction, replace it. +- Do not emit directions that merely restate the idea in different words. + +### Retry and Degradation + +- If the first pass returns fewer than `N` entries, regenerate once with an explicit "you MUST produce `N` orthogonal directions" instruction. +- If the second pass still returns fewer than `N` but at least 2, proceed with the reduced count and emit a warning to the user: `Warning: direction generation returned <count> of <N> requested directions; proceeding with reduced count.` +- If fewer than 2 directions are produced, stop with error: `direction generation degraded; retry.` + +Store the final direction list as `DIRECTIONS` (ordered; index 0..len-1). + +--- + +## Phase 3: Parallel Exploration + +Dispatch all directions in a **single Task-tool message** containing one Task invocation per direction. This is the W2S parallel-swarm step. + +### Subagent Invocation + +For each direction in `DIRECTIONS`, launch one `Explore` subagent. Each invocation prompt MUST include: + +1. A verbatim copy of the idea body loaded from `IDEA_BODY_FILE` in Phase 1. +2. The assigned direction (name + rationale). +3. The following instruction block (reproduce verbatim in the subagent prompt): + +> Explore this direction within the current repo. Gather OBJECTIVE EVIDENCE: +> - Specific repo paths with existing patterns worth extending. +> - Prior art or precedent in the codebase or adjacent tooling. +> - Measurable considerations (approximate complexity, LOC surface, performance implications) where discoverable from reading the code. +> +> Read-only. Do not write any files. +> +> If no concrete evidence exists for this direction, report the literal string `exploratory, no concrete precedent` once in OBJECTIVE_EVIDENCE and stop exploring further. Fabrication of references is forbidden. +> +> Return a structured proposal with exactly these fields: +> - `APPROACH_SUMMARY`: concrete design description (what to build, core mechanism, affected components). +> - `OBJECTIVE_EVIDENCE`: bullet list of repo paths, prior art, or the `exploratory, no concrete precedent` sentinel. +> - `KNOWN_RISKS`: short bullet list. +> - `CONFIDENCE`: one of `high`, `medium`, `low`. + +### Collection and Degradation + +Collect all subagent responses. For each response: +- Parse the four required fields. If a field is missing, mark that proposal as degraded and drop it. +- If fewer than 2 proposals survive, stop with error: `exploration phase degraded; retry.` +- Otherwise continue with the surviving proposals. + +Associate each surviving proposal with its originating direction index, preserving the original ordering for Alt-N numbering in Phase 4. + +--- + +## Phase 4: Synthesis and Write + +### Step 4.1: Pick the Primary Direction + +Review all surviving proposals. Choose the strongest as the primary based on: +1. Evidence density — more concrete repo references outranks fewer. +2. Fit with existing repo patterns — extending patterns outranks introducing unfamiliar paradigms. +3. Implementation surface area — prefer smaller surface where quality is otherwise comparable. +4. Declared `CONFIDENCE` — `high` > `medium` > `low` as tiebreaker. + +Record the chosen direction as `PRIMARY`; the remaining directions become the Alt-1..Alt-(N-1) list in their original ordering. + +### Step 4.2: Infer Title + +Generate a 4-10 word Title Case title that captures the primary direction, not the original input phrasing verbatim. Example: idea `add undo/redo` with primary direction `command-pattern history` yields title `Command-Pattern Undo Stack For The Editor`. + +### Step 4.3: Populate the Template + +Read the template file located at `TEMPLATE_FILE` (from Phase 1 stdout). + +Produce the finalized draft content in memory by replacing placeholders: +- `<TITLE>` — the inferred title. +- `<ORIGINAL_IDEA>` — byte-identical contents of `IDEA_BODY_FILE`. Preserve line breaks, trailing newline, and all formatting. Do NOT paraphrase or re-indent. +- `<PRIMARY_NAME>` — primary direction's short name. +- `<PRIMARY_RATIONALE>` — primary direction's rationale (from Phase 2). +- `<PRIMARY_APPROACH_SUMMARY>` — primary proposal's `APPROACH_SUMMARY`. +- `<PRIMARY_OBJECTIVE_EVIDENCE>` — primary proposal's `OBJECTIVE_EVIDENCE`, rendered as a bullet list. +- `<PRIMARY_KNOWN_RISKS>` — primary proposal's `KNOWN_RISKS`, rendered as a bullet list. +- `<ALTERNATIVES>` — for each remaining direction at index `i` (1-based in user-facing output), emit: + + ```markdown + ### Alt-<i>: <name> + - Gist: <one-paragraph summary derived from APPROACH_SUMMARY> + - Objective Evidence: + - <bullet from OBJECTIVE_EVIDENCE> + - ... + - Why not primary: <one sentence stating the tradeoff vs PRIMARY> + ``` + + Separate consecutive Alt entries with a single blank line. + +- `<SYNTHESIS_NOTES>` — one paragraph describing which elements from the alternatives could fold into the primary if the user chose a different direction. This is the lead's own synthesis note, not a subagent output. + +### Step 4.4: Write the Draft File + +Write the finalized content to `OUTPUT_FILE` using the `Write` tool. Single write; no progressive edits. + +### Step 4.5: Report + +Report to the user: +- Path written (`OUTPUT_FILE`). +- Primary direction name. +- Requested `N` and the actual direction count (note if reduced due to degradation). +- Next-step hint: `To turn this draft into a plan, run: /humanize:gen-plan --input <OUTPUT_FILE> --output <plan-path>`. + +--- + +## Error Handling + +- Phase 1 validation errors stop the command with a clear message. No partial output. +- Phase 2 degradation follows the retry-once + ≥2 minimum rule stated above. +- Phase 3 degradation follows the drop-and-continue + ≥2 minimum rule stated above. +- Never fabricate repo references or prior art. The `exploratory, no concrete precedent` sentinel from subagents is preserved verbatim in the draft. +- If any phase stops with an error, do not write a partial `OUTPUT_FILE`. +```` + +Notes on the triple-backtick-four fence (````): the command file itself contains fenced code blocks, so the outer fence in this plan uses four backticks to avoid collision. + +- [ ] **Step 3.2: Verify the command spec parses as markdown** + +Run: +```bash +head -20 commands/gen-idea.md +``` + +Expected: the frontmatter block (lines 1-9) appears first, then the `# Generate Idea Draft from Loose Input` heading. No stray backticks or broken YAML. + +Also check the frontmatter is well-formed: +```bash +awk '/^---$/{n++; if(n==2) exit} n==1' commands/gen-idea.md +``` + +Expected: a list of YAML keys (`description`, `argument-hint`, `allowed-tools`) with valid values. + +- [ ] **Step 3.3: Commit** + +Run: +```bash +git add commands/gen-idea.md +git commit -m "$(cat <<'EOF' +feat(gen-idea): add slash-command spec for directed-swarm drafting + +Five-phase command: parse, validate via scripts/validate-gen-idea-io.sh, +generate N orthogonal directions grounded in repo context, fan out N +parallel Explore subagents in a single Task-tool message, synthesize +one primary plus N-1 alternatives into the new template, write the +draft. No Codex, no tests, no config-loader integration in this pass. + +Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> +EOF +)" +``` + +Expected: commit succeeds. + +--- + +## Task 4: Update README and Bump Versions + +**Files:** +- Modify: `README.md` +- Modify: `.claude-plugin/plugin.json` +- Modify: `.claude-plugin/marketplace.json` + +- [ ] **Step 4.1: Bump `.claude-plugin/plugin.json`** + +Open the file and change the `version` field from `"1.16.0"` to `"1.16.1"`. The full resulting file: + +```json +{ + "name": "humanize", + "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", + "version": "1.16.1", + "author": { + "name": "PolyArch" + }, + "repository": "https://github.com/PolyArch/humanize", + "homepage": "https://github.com/PolyArch/humanize#readme", + "license": "MIT", + "keywords": [ + "iterative-development", + "codex-review", + "ai-native", + "feedback-loop", + "goal-tracking", + "quality-assurance" + ] +} +``` + +Verify: +```bash +grep '"version"' .claude-plugin/plugin.json +``` +Expected: ` "version": "1.16.1",` + +- [ ] **Step 4.2: Bump `.claude-plugin/marketplace.json`** + +Change `plugins[0].version` from `"1.16.0"` to `"1.16.1"`. Full resulting file: + +```json +{ + "name": "PolyArch", + "owner": { + "name": "PolyArch" + }, + "plugins": [ + { + "name": "humanize", + "source": "./", + "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", + "version": "1.16.1" + } + ] +} +``` + +Verify: +```bash +grep '"version"' .claude-plugin/marketplace.json +``` +Expected: ` "version": "1.16.1"` + +- [ ] **Step 4.3: Update `README.md`** + +Make two edits: + +**Edit 1** — bump the version header. Change line: +``` +**Current Version: 1.16.0** +``` +to: +``` +**Current Version: 1.16.1** +``` + +**Edit 2** — insert a new Quick Start step for `gen-idea` BEFORE the existing `gen-plan` step. + +Locate the existing Quick Start section. It currently begins: +``` +## Quick Start + +1. **Generate a plan** from your draft: + ```bash + /humanize:gen-plan --input draft.md --output docs/plan.md + ``` +``` + +Replace that block with: +``` +## Quick Start + +1. **Generate an idea draft** from a loose thought (optional — skip if you already have a draft): + ```bash + /humanize:gen-idea "add undo/redo to the editor" + ``` + Output goes to `.humanize/ideas/<slug>-<timestamp>.md` by default. Pass a `.md` path to expand existing rough notes. `--n` controls how many parallel directions explore the idea (default 6). + +2. **Generate a plan** from your draft: + ```bash + /humanize:gen-plan --input draft.md --output docs/plan.md + ``` +``` + +Renumber the remaining Quick Start items in that section accordingly (the existing `refine-plan`, `start-rlcr-loop`, `gemini` steps shift down by one). + +Verify: +```bash +grep -n "Current Version" README.md +grep -n "gen-idea" README.md +``` +Expected: the version line now reads `1.16.1`; the `gen-idea` mention appears in Quick Start. + +- [ ] **Step 4.4: Run a quick project rule sanity check** + +Run: +```bash +grep '"version"' .claude-plugin/plugin.json .claude-plugin/marketplace.json +grep 'Current Version' README.md +``` + +All three must show `1.16.1`. The three-file version sync is a project-level rule enforced by CI; mismatches will block the PR. + +- [ ] **Step 4.5: Commit** + +Run: +```bash +git add README.md .claude-plugin/plugin.json .claude-plugin/marketplace.json +git commit -m "$(cat <<'EOF' +feat(gen-idea): document command in README, bump to 1.16.1 + +Add a Quick Start entry for /humanize:gen-idea above the existing +gen-plan step and sync version across plugin.json, marketplace.json, +and the README header. + +Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> +EOF +)" +``` + +Expected: commit succeeds; `git log --oneline -5` shows four feat commits for gen-idea (template, script, command, docs). + +--- + +## Task 5: End-to-End Smoke Check + +**Files:** no new files. This task verifies the full command integrates and produces a valid draft. + +This task requires the validation script and command spec from Tasks 2–3 to be present and committed. It does NOT invoke Claude via a nested agent — it walks the user through a manual smoke test they (or an implementing engineer) run interactively in Claude Code to confirm the command wires up end-to-end. + +- [ ] **Step 5.1: Invoke the command with an inline idea** + +In a Claude Code session with the plugin installed from this branch, run: +``` +/humanize:gen-idea "add bulk-tag operations to the note editor" --n 3 +``` + +Expected: +- Claude reports running `validate-gen-idea-io.sh` and parses `INPUT_MODE: inline`. +- Claude produces exactly 3 orthogonal directions (or emits the documented warning and proceeds with ≥2). +- Claude dispatches Explore subagents in parallel. +- Claude writes a draft to `.humanize/ideas/add-bulk-tag-operations-to-the-note-editor-<timestamp>.md`. +- Final report names the primary direction and the suggested `gen-plan` follow-up command. + +- [ ] **Step 5.2: Inspect the generated draft** + +Run: +```bash +ls -la .humanize/ideas/ +cat .humanize/ideas/add-bulk-tag-operations-to-the-note-editor-*.md +``` + +Expected: exactly one new file. The content has these sections in order: +- `# <Title>` +- `## Original Idea` — contains the exact inline text `add bulk-tag operations to the note editor` (verbatim). +- `## Primary Direction: <Name>` with Rationale, Approach Summary, Objective Evidence, Known Risks subsections populated. +- `## Alternative Directions Considered` with `### Alt-1: ...` and `### Alt-2: ...` subsections (since N=3 → 1 primary + 2 alts). +- `## Synthesis Notes` — one paragraph. + +No placeholder strings like `<TITLE>` or `<PRIMARY_NAME>` remain in the output. + +- [ ] **Step 5.3: Chain into gen-plan to confirm downstream compatibility** + +Run: +``` +/humanize:gen-plan --input .humanize/ideas/add-bulk-tag-operations-to-the-note-editor-<timestamp>.md --output /tmp/gen-idea-smoke-plan.md --direct +``` + +(Use `--direct` to skip the convergence loop for this smoke check; the goal is only to confirm the draft passes `gen-plan` Phase 2 relevance and produces a plan skeleton.) + +Expected: +- gen-plan's relevance check passes. +- A plan file appears at `/tmp/gen-idea-smoke-plan.md` with the standard gen-plan structure (Goal Description, Acceptance Criteria, Path Boundaries, etc.). + +- [ ] **Step 5.4: Cleanup and record findings** + +Run: +```bash +rm -rf .humanize/ideas +rm -f /tmp/gen-idea-smoke-plan.md +``` + +If any step produced unexpected output (wrong exit code, missing section, fabricated evidence), add a follow-up task noting what to fix and which file to edit. Otherwise this plan is complete. + +- [ ] **Step 5.5: Final push** + +Once Tasks 1–4 are committed and Task 5 smoke-passes, push the branch: +```bash +git push -u origin add-gen-idea-command +``` + +Open a PR targeting `dev` (not `main`) — the branch was cut from `origin/dev` and is intended for the `1.16.x` release train. The project's CI check enforces that `feat:` branches target `dev`, not `main`. + +--- + +## Self-Review Notes (Completed During Plan Authoring) + +- **Spec coverage**: every Out-of-Scope-first-pass item in the spec is reflected here as "not done" (no Codex tasks, no config-loader tasks, no test harness). Every in-scope file listed in the spec has a dedicated task. +- **Placeholder scan**: no TBD/TODO/"handle errors appropriately" remain. Every code block shows exact content. +- **Type consistency**: `IDEA_BODY_FILE`, `OUTPUT_FILE`, `SLUG`, `TEMPLATE_FILE`, `N`, `INPUT_MODE` names match across the validation script, the command spec, and the smoke-check expectations. Exit codes are consistent between the script definition, the command error-handling table, and the smoke-test expected values. +- **Scope**: single subsystem (one command + template + script + doc bump). No decomposition needed. From e18d01378e11622d456d1774416c55af32213417 Mon Sep 17 00:00:00 2001 From: shinan6 <liushinan63@gmail.com> Date: Mon, 20 Apr 2026 12:46:20 +0800 Subject: [PATCH 84/97] feat(gen-idea): add draft template for directed-swarm output Skeleton populated by the command's Phase 4 synthesis step. Placeholders <TITLE>, <ORIGINAL_IDEA>, <PRIMARY_*>, <ALTERNATIVES>, <SYNTHESIS_NOTES> are filled deterministically by the command body. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- prompt-template/idea/gen-idea-template.md | 31 +++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 prompt-template/idea/gen-idea-template.md diff --git a/prompt-template/idea/gen-idea-template.md b/prompt-template/idea/gen-idea-template.md new file mode 100644 index 00000000..5feeb8a3 --- /dev/null +++ b/prompt-template/idea/gen-idea-template.md @@ -0,0 +1,31 @@ +# <TITLE> + +## Original Idea + +<ORIGINAL_IDEA> + +## Primary Direction: <PRIMARY_NAME> + +### Rationale + +<PRIMARY_RATIONALE> + +### Approach Summary + +<PRIMARY_APPROACH_SUMMARY> + +### Objective Evidence + +<PRIMARY_OBJECTIVE_EVIDENCE> + +### Known Risks + +<PRIMARY_KNOWN_RISKS> + +## Alternative Directions Considered + +<ALTERNATIVES> + +## Synthesis Notes + +<SYNTHESIS_NOTES> From e3e38de4224b3a0816d722b35bdc118e3dfef2c1 Mon Sep 17 00:00:00 2001 From: shinan6 <liushinan63@gmail.com> Date: Mon, 20 Apr 2026 12:52:59 +0800 Subject: [PATCH 85/97] feat(gen-idea): add IO validation script with slug resolution Detects inline vs .md file input, writes inline text to a tempfile so downstream phases always consume a file, derives a URL-safe slug, and resolves the default output path under .humanize/ideas/. Exit codes parallel validate-gen-plan-io.sh. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- scripts/validate-gen-idea-io.sh | 181 ++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100755 scripts/validate-gen-idea-io.sh diff --git a/scripts/validate-gen-idea-io.sh b/scripts/validate-gen-idea-io.sh new file mode 100755 index 00000000..e6d8d0c4 --- /dev/null +++ b/scripts/validate-gen-idea-io.sh @@ -0,0 +1,181 @@ +#!/usr/bin/env bash +# validate-gen-idea-io.sh +# Validates input, slug, and output paths for the gen-idea command. +# Exit codes: +# 0 - Success +# 1 - Missing idea input or empty input file +# 2 - Input looks like a path but is unreadable, not .md, or does not exist +# 3 - Output parent directory does not exist (user-supplied path only) +# 4 - Output file already exists +# 5 - No write permission to output directory +# 6 - Invalid arguments (including --n out of range) +# 7 - Template file not found (plugin configuration error) + +set -e + +usage() { + echo "Usage: $0 <idea-text-or-path> [--n <int>] [--output <path>]" + echo "" + echo "Arguments:" + echo " <idea-text-or-path> Inline idea text OR path to an existing .md file (required)" + echo " --n Number of directions (default: 6; range: 2-10)" + echo " --output Output draft path (default: .humanize/ideas/<slug>-<timestamp>.md)" + echo " -h, --help Show this help message" + exit 6 +} + +IDEA_INPUT="" +N=6 +OUTPUT_FILE="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --n) + if [[ $# -lt 2 || "$2" == --* ]]; then + echo "ERROR: --n requires a value" + usage + fi + N="$2" + shift 2 + ;; + --output) + if [[ $# -lt 2 || "$2" == --* ]]; then + echo "ERROR: --output requires a value" + usage + fi + OUTPUT_FILE="$2" + shift 2 + ;; + -h|--help) + usage + ;; + --*) + echo "ERROR: Unknown option: $1" + usage + ;; + *) + if [[ -z "$IDEA_INPUT" ]]; then + IDEA_INPUT="$1" + shift + else + echo "ERROR: Unexpected positional argument: $1" + usage + fi + ;; + esac +done + +if [[ -z "$IDEA_INPUT" ]]; then + echo "VALIDATION_ERROR: MISSING_IDEA" + echo "No idea provided. Pass inline text or a .md file path as the first argument." + exit 1 +fi + +if ! [[ "$N" =~ ^[0-9]+$ ]]; then + echo "VALIDATION_ERROR: INVALID_N" + echo "--n must be a non-negative integer; got: $N" + exit 6 +fi +if (( N < 2 || N > 10 )); then + echo "VALIDATION_ERROR: N_OUT_OF_RANGE" + echo "--n must be between 2 and 10 inclusive; got: $N" + exit 6 +fi + +INPUT_MODE="" +IDEA_BODY_FILE="" +SLUG="" + +looks_like_path=false +if [[ "$IDEA_INPUT" == *.md ]]; then + looks_like_path=true +elif [[ "$IDEA_INPUT" == */* && "$IDEA_INPUT" != *" "* ]]; then + looks_like_path=true +fi + +if [[ -f "$IDEA_INPUT" ]]; then + if [[ "$IDEA_INPUT" != *.md ]]; then + echo "VALIDATION_ERROR: INPUT_NOT_MD" + echo "File input must have .md extension; got: $IDEA_INPUT" + exit 2 + fi + if [[ ! -s "$IDEA_INPUT" ]]; then + echo "VALIDATION_ERROR: INPUT_EMPTY" + echo "Input file is empty: $IDEA_INPUT" + exit 1 + fi + INPUT_MODE="file" + IDEA_BODY_FILE="$(realpath "$IDEA_INPUT")" + base="$(basename "$IDEA_INPUT")" + SLUG="${base%.md}" +elif [[ "$looks_like_path" == true ]]; then + echo "VALIDATION_ERROR: INPUT_NOT_FOUND" + echo "Looks like a file path but does not exist: $IDEA_INPUT" + exit 2 +else + INPUT_MODE="inline" + TMPFILE="$(mktemp "${TMPDIR:-/tmp}/gen-idea-inline-XXXXXX")" + printf '%s\n' "$IDEA_INPUT" > "$TMPFILE" + IDEA_BODY_FILE="$TMPFILE" + slug_raw="$(printf '%s' "$IDEA_INPUT" | head -c 40 | tr '[:upper:]' '[:lower:]' | sed -E 's/[^a-z0-9-]+/-/g' | sed -E 's/-+/-/g' | sed -E 's/^-+//; s/-+$//')" + if [[ -z "$slug_raw" ]]; then + slug_raw="idea" + fi + SLUG="$slug_raw" +fi + +PROJECT_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" + +DEFAULT_OUTPUT=false +if [[ -z "$OUTPUT_FILE" ]]; then + TIMESTAMP="$(date +%Y%m%d-%H%M%S)" + OUTPUT_FILE="$PROJECT_ROOT/.humanize/ideas/${SLUG}-${TIMESTAMP}.md" + DEFAULT_OUTPUT=true +fi + +OUTPUT_FILE="$(realpath -m "$OUTPUT_FILE" 2>/dev/null || echo "$OUTPUT_FILE")" +OUTPUT_DIR="$(dirname "$OUTPUT_FILE")" + +if [[ "$DEFAULT_OUTPUT" == true ]]; then + mkdir -p "$OUTPUT_DIR" 2>/dev/null || true +fi + +if [[ ! -d "$OUTPUT_DIR" ]]; then + echo "VALIDATION_ERROR: OUTPUT_DIR_NOT_FOUND" + echo "Output directory does not exist: $OUTPUT_DIR" + exit 3 +fi + +if [[ -e "$OUTPUT_FILE" ]]; then + echo "VALIDATION_ERROR: OUTPUT_EXISTS" + echo "Output already exists: $OUTPUT_FILE" + exit 4 +fi + +if [[ ! -w "$OUTPUT_DIR" ]]; then + echo "VALIDATION_ERROR: NO_WRITE_PERMISSION" + echo "No write permission: $OUTPUT_DIR" + exit 5 +fi + +if [[ -n "${CLAUDE_PLUGIN_ROOT:-}" ]]; then + TEMPLATE_FILE="$CLAUDE_PLUGIN_ROOT/prompt-template/idea/gen-idea-template.md" +else + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" + TEMPLATE_FILE="$SCRIPT_DIR/../prompt-template/idea/gen-idea-template.md" +fi + +if [[ ! -f "$TEMPLATE_FILE" ]]; then + echo "VALIDATION_ERROR: TEMPLATE_NOT_FOUND" + echo "Template file missing: $TEMPLATE_FILE" + exit 7 +fi + +echo "VALIDATION_SUCCESS" +echo "INPUT_MODE: $INPUT_MODE" +echo "IDEA_BODY_FILE: $IDEA_BODY_FILE" +echo "OUTPUT_FILE: $OUTPUT_FILE" +echo "SLUG: $SLUG" +echo "TEMPLATE_FILE: $TEMPLATE_FILE" +echo "N: $N" +exit 0 From 911c5da5f4448e84fbd2a75be4647215551aaa35 Mon Sep 17 00:00:00 2001 From: shinan6 <liushinan63@gmail.com> Date: Mon, 20 Apr 2026 12:54:00 +0800 Subject: [PATCH 86/97] docs(plan): align Task 2 script with implemented fixes Two corrections required to pass macOS smoke tests: - Path heuristic now treats slashes as path-indicating only when no whitespace is present, so inline ideas like "add undo/redo" are not misread as paths. - mktemp template drops the .md suffix (BSD mktemp does not support a suffix after the X template). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- docs/superpowers/plans/2026-04-20-gen-idea.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/superpowers/plans/2026-04-20-gen-idea.md b/docs/superpowers/plans/2026-04-20-gen-idea.md index 140cda97..380f93a8 100644 --- a/docs/superpowers/plans/2026-04-20-gen-idea.md +++ b/docs/superpowers/plans/2026-04-20-gen-idea.md @@ -214,7 +214,9 @@ IDEA_BODY_FILE="" SLUG="" looks_like_path=false -if [[ "$IDEA_INPUT" == *.md || "$IDEA_INPUT" == */* ]]; then +if [[ "$IDEA_INPUT" == *.md ]]; then + looks_like_path=true +elif [[ "$IDEA_INPUT" == */* && "$IDEA_INPUT" != *" "* ]]; then looks_like_path=true fi @@ -239,7 +241,7 @@ elif [[ "$looks_like_path" == true ]]; then exit 2 else INPUT_MODE="inline" - TMPFILE="$(mktemp "${TMPDIR:-/tmp}/gen-idea-inline-XXXXXX.md")" + TMPFILE="$(mktemp "${TMPDIR:-/tmp}/gen-idea-inline-XXXXXX")" printf '%s\n' "$IDEA_INPUT" > "$TMPFILE" IDEA_BODY_FILE="$TMPFILE" slug_raw="$(printf '%s' "$IDEA_INPUT" | head -c 40 | tr '[:upper:]' '[:lower:]' | sed -E 's/[^a-z0-9-]+/-/g' | sed -E 's/-+/-/g' | sed -E 's/^-+//; s/-+$//')" From 7e6442bc65b115f4592f9387bf0d5b3136a3d586 Mon Sep 17 00:00:00 2001 From: shinan6 <liushinan63@gmail.com> Date: Mon, 20 Apr 2026 13:00:14 +0800 Subject: [PATCH 87/97] fix(gen-idea): harden realpath fallback and document tempfile contract Code-quality review surfaced three items on the IO validation script: - Guard `realpath $IDEA_INPUT` with a fallback so older macOS does not trip `set -e` on the file-input branch. - Comment explaining the caller owns tempfile cleanup (no trap) so a future maintainer does not silently break the caller. - Comment clarifying the path-vs-inline heuristic and its limitation (a non-existent path with spaces falls through to inline mode). The plan's literal script copy is updated to match. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- docs/superpowers/plans/2026-04-20-gen-idea.md | 12 +++++++++++- scripts/validate-gen-idea-io.sh | 12 +++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/docs/superpowers/plans/2026-04-20-gen-idea.md b/docs/superpowers/plans/2026-04-20-gen-idea.md index 380f93a8..70af9d5a 100644 --- a/docs/superpowers/plans/2026-04-20-gen-idea.md +++ b/docs/superpowers/plans/2026-04-20-gen-idea.md @@ -213,6 +213,12 @@ INPUT_MODE="" IDEA_BODY_FILE="" SLUG="" +# Detect whether IDEA_INPUT is meant as a file path. The `-f` test on line +# below is the primary gate; this heuristic only matters when that test fails +# and we must decide whether to emit INPUT_NOT_FOUND (user meant a path) or +# treat the text as inline. Limitation: a path that contains spaces AND does +# not exist falls through to inline mode silently, because the space rule is +# how we avoid misclassifying inline ideas that happen to contain "/". looks_like_path=false if [[ "$IDEA_INPUT" == *.md ]]; then looks_like_path=true @@ -232,7 +238,7 @@ if [[ -f "$IDEA_INPUT" ]]; then exit 1 fi INPUT_MODE="file" - IDEA_BODY_FILE="$(realpath "$IDEA_INPUT")" + IDEA_BODY_FILE="$(realpath "$IDEA_INPUT" 2>/dev/null || echo "$IDEA_INPUT")" base="$(basename "$IDEA_INPUT")" SLUG="${base%.md}" elif [[ "$looks_like_path" == true ]]; then @@ -241,6 +247,10 @@ elif [[ "$looks_like_path" == true ]]; then exit 2 else INPUT_MODE="inline" + # Deliberately no `trap ... EXIT` to remove TMPFILE: the caller consumes + # IDEA_BODY_FILE after this script exits, so a naive trap would delete + # the file the caller needs. On error paths the tempfile is leaked in + # $TMPDIR; the OS sweeps $TMPDIR on reboot and the cost is negligible. TMPFILE="$(mktemp "${TMPDIR:-/tmp}/gen-idea-inline-XXXXXX")" printf '%s\n' "$IDEA_INPUT" > "$TMPFILE" IDEA_BODY_FILE="$TMPFILE" diff --git a/scripts/validate-gen-idea-io.sh b/scripts/validate-gen-idea-io.sh index e6d8d0c4..22aad3da 100755 --- a/scripts/validate-gen-idea-io.sh +++ b/scripts/validate-gen-idea-io.sh @@ -86,6 +86,12 @@ INPUT_MODE="" IDEA_BODY_FILE="" SLUG="" +# Detect whether IDEA_INPUT is meant as a file path. The `-f` test on line +# below is the primary gate; this heuristic only matters when that test fails +# and we must decide whether to emit INPUT_NOT_FOUND (user meant a path) or +# treat the text as inline. Limitation: a path that contains spaces AND does +# not exist falls through to inline mode silently, because the space rule is +# how we avoid misclassifying inline ideas that happen to contain "/". looks_like_path=false if [[ "$IDEA_INPUT" == *.md ]]; then looks_like_path=true @@ -105,7 +111,7 @@ if [[ -f "$IDEA_INPUT" ]]; then exit 1 fi INPUT_MODE="file" - IDEA_BODY_FILE="$(realpath "$IDEA_INPUT")" + IDEA_BODY_FILE="$(realpath "$IDEA_INPUT" 2>/dev/null || echo "$IDEA_INPUT")" base="$(basename "$IDEA_INPUT")" SLUG="${base%.md}" elif [[ "$looks_like_path" == true ]]; then @@ -114,6 +120,10 @@ elif [[ "$looks_like_path" == true ]]; then exit 2 else INPUT_MODE="inline" + # Deliberately no `trap ... EXIT` to remove TMPFILE: the caller consumes + # IDEA_BODY_FILE after this script exits, so a naive trap would delete + # the file the caller needs. On error paths the tempfile is leaked in + # $TMPDIR; the OS sweeps $TMPDIR on reboot and the cost is negligible. TMPFILE="$(mktemp "${TMPDIR:-/tmp}/gen-idea-inline-XXXXXX")" printf '%s\n' "$IDEA_INPUT" > "$TMPFILE" IDEA_BODY_FILE="$TMPFILE" From a49ed09ba8e97b57a3a3da44b6018c61dfcde40d Mon Sep 17 00:00:00 2001 From: shinan6 <liushinan63@gmail.com> Date: Mon, 20 Apr 2026 13:06:25 +0800 Subject: [PATCH 88/97] feat(gen-idea): add slash-command spec for directed-swarm drafting Five-phase command: parse, validate via scripts/validate-gen-idea-io.sh, generate N orthogonal directions grounded in repo context, fan out N parallel Explore subagents in a single Task-tool message, synthesize one primary plus N-1 alternatives into the new template, write the draft. No Codex, no tests, no config-loader integration in this pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- commands/gen-idea.md | 203 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 commands/gen-idea.md diff --git a/commands/gen-idea.md b/commands/gen-idea.md new file mode 100644 index 00000000..b33b0fe3 --- /dev/null +++ b/commands/gen-idea.md @@ -0,0 +1,203 @@ +--- +description: "Generate a repo-grounded idea draft via directed-swarm exploration" +argument-hint: "<idea-text-or-path> [--n <int>] [--output <path>]" +allowed-tools: + - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/validate-gen-idea-io.sh:*)" + - "Read" + - "Glob" + - "Grep" + - "Task" + - "Write" +--- + +# Generate Idea Draft from Loose Input + +Read and execute below with ultrathink. + +## Hard Constraint: Draft-Only Output + +This command MUST NOT implement features, modify source code, or create commits while producing the draft. Permitted writes are limited to the single output draft file produced in Phase 4. All exploration subagents run read-only. + +This command transforms a loose idea into a repo-grounded draft suitable as input to `/humanize:gen-plan`. It applies directed-diversity exploration: a lead picks N orthogonal directions, N parallel `Explore` subagents develop each, the lead synthesizes a draft with one primary direction plus N-1 alternatives. Each direction carries objective evidence from the repo. + +## Workflow Overview + +> Sequential Execution Constraint: All phases MUST execute strictly in order. Each phase fully completes before the next. + +1. Parse Input +2. IO Validation +3. Direction Generation +4. Parallel Exploration +5. Synthesis and Write + +--- + +## Phase 0: Parse Input + +Extract from `$ARGUMENTS`: +- First positional: inline idea text or path to a `.md` file (required). +- `--n <int>`: number of directions. Default 6. +- `--output <path>`: target draft path. Default resolved by the validation script. + +Do not interpret or rewrite the idea text here. Pass `$ARGUMENTS` through to Phase 1 unchanged. + +--- + +## Phase 1: IO Validation + +Run: +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/validate-gen-idea-io.sh" $ARGUMENTS +``` + +Handle exit codes: +- `0`: Parse stdout to extract `INPUT_MODE`, `IDEA_BODY_FILE`, `OUTPUT_FILE`, `SLUG`, `TEMPLATE_FILE`, `N` (each appears on its own `KEY: value` line). Continue to Phase 2. +- `1`: Report "Missing or empty idea input" and stop. +- `2`: Report "Input looks like a file path but is missing, not readable, or not `.md`" and stop. +- `3`: Report "Output directory does not exist — please create it or choose a different path" and stop. +- `4`: Report "Output file already exists — choose a different path" and stop. +- `5`: Report "No write permission to output directory" and stop. +- `6`: Report "Invalid arguments" with the stdout usage text and stop. +- `7`: Report "Template file missing — plugin configuration error" and stop. + +Read the full contents of `IDEA_BODY_FILE` using the `Read` tool. Preserve byte-identical content in memory for later phases. + +--- + +## Phase 2: Direction Generation + +Generate exactly `N` orthogonal directions for exploring the idea. + +### Context to Gather + +Before generating directions, read: +- `README.md` at the project root. +- `CLAUDE.md` at the project root (if it exists). +- `.claude/CLAUDE.md` (if it exists). +- Top-level directory listing via `Glob` with pattern `*` (one level, no recursion). + +This context grounds the directions in the actual repo rather than generic brainstorming. + +### Generation Rules + +Produce exactly `N` direction entries. Each entry has: +- `name`: a 2-5 word short label. +- `rationale`: a single sentence explaining why this angle is distinct from the other directions. + +Hard constraint: **orthogonality**. Two near-duplicate directions defeat the directed-diversity premise. Before returning: +- If two directions feel like dupes, replace one with a genuinely different angle. +- If a direction collapses to "just do X better" with no angle distinction, replace it. +- Do not emit directions that merely restate the idea in different words. + +### Retry and Degradation + +- If the first pass returns fewer than `N` entries, regenerate once with an explicit "you MUST produce `N` orthogonal directions" instruction. +- If the second pass still returns fewer than `N` but at least 2, proceed with the reduced count and emit a warning to the user: `Warning: direction generation returned <count> of <N> requested directions; proceeding with reduced count.` +- If fewer than 2 directions are produced, stop with error: `direction generation degraded; retry.` + +Store the final direction list as `DIRECTIONS` (ordered; index 0..len-1). + +--- + +## Phase 3: Parallel Exploration + +Dispatch all directions in a **single Task-tool message** containing one Task invocation per direction. This is the W2S parallel-swarm step. + +### Subagent Invocation + +For each direction in `DIRECTIONS`, launch one `Explore` subagent. Each invocation prompt MUST include: + +1. A verbatim copy of the idea body loaded from `IDEA_BODY_FILE` in Phase 1. +2. The assigned direction (name + rationale). +3. The following instruction block (reproduce verbatim in the subagent prompt): + +> Explore this direction within the current repo. Gather OBJECTIVE EVIDENCE: +> - Specific repo paths with existing patterns worth extending. +> - Prior art or precedent in the codebase or adjacent tooling. +> - Measurable considerations (approximate complexity, LOC surface, performance implications) where discoverable from reading the code. +> +> Read-only. Do not write any files. +> +> If no concrete evidence exists for this direction, report the literal string `exploratory, no concrete precedent` once in OBJECTIVE_EVIDENCE and stop exploring further. Fabrication of references is forbidden. +> +> Return a structured proposal with exactly these fields: +> - `APPROACH_SUMMARY`: concrete design description (what to build, core mechanism, affected components). +> - `OBJECTIVE_EVIDENCE`: bullet list of repo paths, prior art, or the `exploratory, no concrete precedent` sentinel. +> - `KNOWN_RISKS`: short bullet list. +> - `CONFIDENCE`: one of `high`, `medium`, `low`. + +### Collection and Degradation + +Collect all subagent responses. For each response: +- Parse the four required fields. If a field is missing, mark that proposal as degraded and drop it. +- If fewer than 2 proposals survive, stop with error: `exploration phase degraded; retry.` +- Otherwise continue with the surviving proposals. + +Associate each surviving proposal with its originating direction index, preserving the original ordering for Alt-N numbering in Phase 4. + +--- + +## Phase 4: Synthesis and Write + +### Step 4.1: Pick the Primary Direction + +Review all surviving proposals. Choose the strongest as the primary based on: +1. Evidence density — more concrete repo references outranks fewer. +2. Fit with existing repo patterns — extending patterns outranks introducing unfamiliar paradigms. +3. Implementation surface area — prefer smaller surface where quality is otherwise comparable. +4. Declared `CONFIDENCE` — `high` > `medium` > `low` as tiebreaker. + +Record the chosen direction as `PRIMARY`; the remaining directions become the Alt-1..Alt-(N-1) list in their original ordering. + +### Step 4.2: Infer Title + +Generate a 4-10 word Title Case title that captures the primary direction, not the original input phrasing verbatim. Example: idea `add undo/redo` with primary direction `command-pattern history` yields title `Command-Pattern Undo Stack For The Editor`. + +### Step 4.3: Populate the Template + +Read the template file located at `TEMPLATE_FILE` (from Phase 1 stdout). + +Produce the finalized draft content in memory by replacing placeholders: +- `<TITLE>` — the inferred title. +- `<ORIGINAL_IDEA>` — byte-identical contents of `IDEA_BODY_FILE`. Preserve line breaks, trailing newline, and all formatting. Do NOT paraphrase or re-indent. +- `<PRIMARY_NAME>` — primary direction's short name. +- `<PRIMARY_RATIONALE>` — primary direction's rationale (from Phase 2). +- `<PRIMARY_APPROACH_SUMMARY>` — primary proposal's `APPROACH_SUMMARY`. +- `<PRIMARY_OBJECTIVE_EVIDENCE>` — primary proposal's `OBJECTIVE_EVIDENCE`, rendered as a bullet list. +- `<PRIMARY_KNOWN_RISKS>` — primary proposal's `KNOWN_RISKS`, rendered as a bullet list. +- `<ALTERNATIVES>` — for each remaining direction at index `i` (1-based in user-facing output), emit: + + ```markdown + ### Alt-<i>: <name> + - Gist: <one-paragraph summary derived from APPROACH_SUMMARY> + - Objective Evidence: + - <bullet from OBJECTIVE_EVIDENCE> + - ... + - Why not primary: <one sentence stating the tradeoff vs PRIMARY> + ``` + + Separate consecutive Alt entries with a single blank line. + +- `<SYNTHESIS_NOTES>` — one paragraph describing which elements from the alternatives could fold into the primary if the user chose a different direction. This is the lead's own synthesis note, not a subagent output. + +### Step 4.4: Write the Draft File + +Write the finalized content to `OUTPUT_FILE` using the `Write` tool. Single write; no progressive edits. + +### Step 4.5: Report + +Report to the user: +- Path written (`OUTPUT_FILE`). +- Primary direction name. +- Requested `N` and the actual direction count (note if reduced due to degradation). +- Next-step hint: `To turn this draft into a plan, run: /humanize:gen-plan --input <OUTPUT_FILE> --output <plan-path>`. + +--- + +## Error Handling + +- Phase 1 validation errors stop the command with a clear message. No partial output. +- Phase 2 degradation follows the retry-once + ≥2 minimum rule stated above. +- Phase 3 degradation follows the drop-and-continue + ≥2 minimum rule stated above. +- Never fabricate repo references or prior art. The `exploratory, no concrete precedent` sentinel from subagents is preserved verbatim in the draft. +- If any phase stops with an error, do not write a partial `OUTPUT_FILE`. From 2269e0a0fdf8e784d00df96cf647ee35235bcf3c Mon Sep 17 00:00:00 2001 From: shinan6 <liushinan63@gmail.com> Date: Mon, 20 Apr 2026 13:12:13 +0800 Subject: [PATCH 89/97] fix(gen-idea): clarify command spec ambiguities surfaced in review Five documentation patches to commands/gen-idea.md (and the mirrored copy in the plan doc): - State that degraded runs renumber surviving alternatives Alt-1..Alt-K sequentially rather than preserving source-index gaps. - Specify the rendering of the `exploratory, no concrete precedent` sentinel as a single bullet when it is the sole evidence. - Note that the Hard Constraint permits the validation script's prerequisite `.humanize/ideas/` directory creation. - Bold the Sequential Execution Constraint label for style parity with gen-plan.md. - Pin the Phase 2 context paths (README.md, CLAUDE.md, .claude/CLAUDE.md) to the project root via `git rev-parse --show-toplevel`. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- commands/gen-idea.md | 14 +++++++------- docs/superpowers/plans/2026-04-20-gen-idea.md | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/commands/gen-idea.md b/commands/gen-idea.md index b33b0fe3..b5127e2f 100644 --- a/commands/gen-idea.md +++ b/commands/gen-idea.md @@ -16,13 +16,13 @@ Read and execute below with ultrathink. ## Hard Constraint: Draft-Only Output -This command MUST NOT implement features, modify source code, or create commits while producing the draft. Permitted writes are limited to the single output draft file produced in Phase 4. All exploration subagents run read-only. +This command MUST NOT implement features, modify source code, or create commits while producing the draft. Permitted writes are limited to the single output draft file produced in Phase 4; prerequisite directory creation for the default `.humanize/ideas/` path by the validation script is permitted as part of that write. All exploration subagents run read-only. This command transforms a loose idea into a repo-grounded draft suitable as input to `/humanize:gen-plan`. It applies directed-diversity exploration: a lead picks N orthogonal directions, N parallel `Explore` subagents develop each, the lead synthesizes a draft with one primary direction plus N-1 alternatives. Each direction carries objective evidence from the repo. ## Workflow Overview -> Sequential Execution Constraint: All phases MUST execute strictly in order. Each phase fully completes before the next. +> **Sequential Execution Constraint**: All phases MUST execute strictly in order. Each phase fully completes before the next. 1. Parse Input 2. IO Validation @@ -70,7 +70,7 @@ Generate exactly `N` orthogonal directions for exploring the idea. ### Context to Gather -Before generating directions, read: +Before generating directions, read (paths relative to the project root, which is `$(git rev-parse --show-toplevel)`): - `README.md` at the project root. - `CLAUDE.md` at the project root (if it exists). - `.claude/CLAUDE.md` (if it exists). @@ -133,7 +133,7 @@ Collect all subagent responses. For each response: - If fewer than 2 proposals survive, stop with error: `exploration phase degraded; retry.` - Otherwise continue with the surviving proposals. -Associate each surviving proposal with its originating direction index, preserving the original ordering for Alt-N numbering in Phase 4. +Associate each surviving proposal with its originating direction (so Phase 4 can label it with the original direction name). When numbering alternatives in Phase 4 after any drops, renumber survivors sequentially as Alt-1..Alt-K (where K is the count of surviving non-primary directions). Do not preserve gaps from dropped proposals. --- @@ -147,7 +147,7 @@ Review all surviving proposals. Choose the strongest as the primary based on: 3. Implementation surface area — prefer smaller surface where quality is otherwise comparable. 4. Declared `CONFIDENCE` — `high` > `medium` > `low` as tiebreaker. -Record the chosen direction as `PRIMARY`; the remaining directions become the Alt-1..Alt-(N-1) list in their original ordering. +Record the chosen direction as `PRIMARY`; the remaining surviving directions become the Alt-1..Alt-K list (where K is the number of non-primary survivors, K ≤ N-1), numbered sequentially in their original direction order with no gaps for any dropped proposals. ### Step 4.2: Infer Title @@ -163,9 +163,9 @@ Produce the finalized draft content in memory by replacing placeholders: - `<PRIMARY_NAME>` — primary direction's short name. - `<PRIMARY_RATIONALE>` — primary direction's rationale (from Phase 2). - `<PRIMARY_APPROACH_SUMMARY>` — primary proposal's `APPROACH_SUMMARY`. -- `<PRIMARY_OBJECTIVE_EVIDENCE>` — primary proposal's `OBJECTIVE_EVIDENCE`, rendered as a bullet list. +- `<PRIMARY_OBJECTIVE_EVIDENCE>` — primary proposal's `OBJECTIVE_EVIDENCE`, rendered as a bullet list. If the subagent returned only the literal sentinel `exploratory, no concrete precedent`, render it as a single bullet: `- exploratory, no concrete precedent`. - `<PRIMARY_KNOWN_RISKS>` — primary proposal's `KNOWN_RISKS`, rendered as a bullet list. -- `<ALTERNATIVES>` — for each remaining direction at index `i` (1-based in user-facing output), emit: +- `<ALTERNATIVES>` — for each non-primary survivor at its Alt index `i` (1-based, sequential per Step 4.1), emit: ```markdown ### Alt-<i>: <name> diff --git a/docs/superpowers/plans/2026-04-20-gen-idea.md b/docs/superpowers/plans/2026-04-20-gen-idea.md index 70af9d5a..a7ba31d5 100644 --- a/docs/superpowers/plans/2026-04-20-gen-idea.md +++ b/docs/superpowers/plans/2026-04-20-gen-idea.md @@ -462,13 +462,13 @@ Read and execute below with ultrathink. ## Hard Constraint: Draft-Only Output -This command MUST NOT implement features, modify source code, or create commits while producing the draft. Permitted writes are limited to the single output draft file produced in Phase 4. All exploration subagents run read-only. +This command MUST NOT implement features, modify source code, or create commits while producing the draft. Permitted writes are limited to the single output draft file produced in Phase 4; prerequisite directory creation for the default `.humanize/ideas/` path by the validation script is permitted as part of that write. All exploration subagents run read-only. This command transforms a loose idea into a repo-grounded draft suitable as input to `/humanize:gen-plan`. It applies directed-diversity exploration: a lead picks N orthogonal directions, N parallel `Explore` subagents develop each, the lead synthesizes a draft with one primary direction plus N-1 alternatives. Each direction carries objective evidence from the repo. ## Workflow Overview -> Sequential Execution Constraint: All phases MUST execute strictly in order. Each phase fully completes before the next. +> **Sequential Execution Constraint**: All phases MUST execute strictly in order. Each phase fully completes before the next. 1. Parse Input 2. IO Validation @@ -516,7 +516,7 @@ Generate exactly `N` orthogonal directions for exploring the idea. ### Context to Gather -Before generating directions, read: +Before generating directions, read (paths relative to the project root, which is `$(git rev-parse --show-toplevel)`): - `README.md` at the project root. - `CLAUDE.md` at the project root (if it exists). - `.claude/CLAUDE.md` (if it exists). @@ -579,7 +579,7 @@ Collect all subagent responses. For each response: - If fewer than 2 proposals survive, stop with error: `exploration phase degraded; retry.` - Otherwise continue with the surviving proposals. -Associate each surviving proposal with its originating direction index, preserving the original ordering for Alt-N numbering in Phase 4. +Associate each surviving proposal with its originating direction (so Phase 4 can label it with the original direction name). When numbering alternatives in Phase 4 after any drops, renumber survivors sequentially as Alt-1..Alt-K (where K is the count of surviving non-primary directions). Do not preserve gaps from dropped proposals. --- @@ -593,7 +593,7 @@ Review all surviving proposals. Choose the strongest as the primary based on: 3. Implementation surface area — prefer smaller surface where quality is otherwise comparable. 4. Declared `CONFIDENCE` — `high` > `medium` > `low` as tiebreaker. -Record the chosen direction as `PRIMARY`; the remaining directions become the Alt-1..Alt-(N-1) list in their original ordering. +Record the chosen direction as `PRIMARY`; the remaining surviving directions become the Alt-1..Alt-K list (where K is the number of non-primary survivors, K ≤ N-1), numbered sequentially in their original direction order with no gaps for any dropped proposals. ### Step 4.2: Infer Title @@ -609,9 +609,9 @@ Produce the finalized draft content in memory by replacing placeholders: - `<PRIMARY_NAME>` — primary direction's short name. - `<PRIMARY_RATIONALE>` — primary direction's rationale (from Phase 2). - `<PRIMARY_APPROACH_SUMMARY>` — primary proposal's `APPROACH_SUMMARY`. -- `<PRIMARY_OBJECTIVE_EVIDENCE>` — primary proposal's `OBJECTIVE_EVIDENCE`, rendered as a bullet list. +- `<PRIMARY_OBJECTIVE_EVIDENCE>` — primary proposal's `OBJECTIVE_EVIDENCE`, rendered as a bullet list. If the subagent returned only the literal sentinel `exploratory, no concrete precedent`, render it as a single bullet: `- exploratory, no concrete precedent`. - `<PRIMARY_KNOWN_RISKS>` — primary proposal's `KNOWN_RISKS`, rendered as a bullet list. -- `<ALTERNATIVES>` — for each remaining direction at index `i` (1-based in user-facing output), emit: +- `<ALTERNATIVES>` — for each non-primary survivor at its Alt index `i` (1-based, sequential per Step 4.1), emit: ```markdown ### Alt-<i>: <name> From c6d8d19722b9f4efc8b56744f30aa699fde96859 Mon Sep 17 00:00:00 2001 From: shinan6 <liushinan63@gmail.com> Date: Mon, 20 Apr 2026 13:15:32 +0800 Subject: [PATCH 90/97] feat(gen-idea): document command in README, bump to 1.16.1 Add a Quick Start entry for /humanize:gen-idea above the existing gen-plan step and sync version across plugin.json, marketplace.json, and the README header. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 18 ++++++++++++------ 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 2e833ddc..e72bd0aa 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.0" + "version": "1.16.1" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index fd77b933..1ed2e046 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.0", + "version": "1.16.1", "author": { "name": "PolyArch" }, diff --git a/README.md b/README.md index da6d8305..7d517d4b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.16.0** +**Current Version: 1.16.1** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. @@ -41,27 +41,33 @@ Requires [codex CLI](https://github.com/openai/codex) for review. See the full [ ## Quick Start -1. **Generate a plan** from your draft: +1. **Generate an idea draft** from a loose thought (optional — skip if you already have a draft): + ```bash + /humanize:gen-idea "add undo/redo to the editor" + ``` + Output goes to `.humanize/ideas/<slug>-<timestamp>.md` by default. Pass a `.md` path to expand existing rough notes. `--n` controls how many parallel directions explore the idea (default 6). + +2. **Generate a plan** from your draft: ```bash /humanize:gen-plan --input draft.md --output docs/plan.md ``` -2. **Refine an annotated plan** before implementation when reviewers add comments (`CMT:` ... `ENDCMT`, `<cmt>` ... `</cmt>`, or `<comment>` ... `</comment>`): +3. **Refine an annotated plan** before implementation when reviewers add comments (`CMT:` ... `ENDCMT`, `<cmt>` ... `</cmt>`, or `<comment>` ... `</comment>`): ```bash /humanize:refine-plan --input docs/plan.md ``` -3. **Run the loop**: +4. **Run the loop**: ```bash /humanize:start-rlcr-loop docs/plan.md ``` -4. **Consult Gemini** for deep web research (requires Gemini CLI): +5. **Consult Gemini** for deep web research (requires Gemini CLI): ```bash /humanize:ask-gemini What are the latest best practices for X? ``` -5. **Monitor progress (in another terminal, not inside Claude Code)**: +6. **Monitor progress (in another terminal, not inside Claude Code)**: ```bash source <path/to/humanize>/scripts/humanize.sh # Or just add it into your .bashec or .zshrc humanize monitor rlcr # RLCR loop From 1adeca421e0788d6ebccfe3ef6d63db62a5dab26 Mon Sep 17 00:00:00 2001 From: shinan6 <liushinan63@gmail.com> Date: Mon, 20 Apr 2026 13:25:34 +0800 Subject: [PATCH 91/97] feat(gen-idea): emit short-idea warning and clarify SLUG usage Address final-review findings: - Implement the spec's warn-and-continue behavior for inline ideas shorter than 10 characters by emitting a `WARNING: short idea ...` line on stdout before VALIDATION_SUCCESS. The command spec documents this as a non-error informational line that should be surfaced to the user but does not block Phase 2. - Note in Phase 1 that SLUG is informational; it is already baked into OUTPUT_FILE by the script and does not need to be consumed directly by later phases. - Mirror both edits into the plan doc copies. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- commands/gen-idea.md | 4 +++- docs/superpowers/plans/2026-04-20-gen-idea.md | 7 ++++++- scripts/validate-gen-idea-io.sh | 3 +++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/commands/gen-idea.md b/commands/gen-idea.md index b5127e2f..2897a818 100644 --- a/commands/gen-idea.md +++ b/commands/gen-idea.md @@ -51,7 +51,7 @@ Run: ``` Handle exit codes: -- `0`: Parse stdout to extract `INPUT_MODE`, `IDEA_BODY_FILE`, `OUTPUT_FILE`, `SLUG`, `TEMPLATE_FILE`, `N` (each appears on its own `KEY: value` line). Continue to Phase 2. +- `0`: Parse stdout to extract `INPUT_MODE`, `IDEA_BODY_FILE`, `OUTPUT_FILE`, `SLUG`, `TEMPLATE_FILE`, `N` (each appears on its own `KEY: value` line). Continue to Phase 2. (`SLUG` is informational — the script has already incorporated it into `OUTPUT_FILE`, so later phases do not need to use `SLUG` directly.) - `1`: Report "Missing or empty idea input" and stop. - `2`: Report "Input looks like a file path but is missing, not readable, or not `.md`" and stop. - `3`: Report "Output directory does not exist — please create it or choose a different path" and stop. @@ -60,6 +60,8 @@ Handle exit codes: - `6`: Report "Invalid arguments" with the stdout usage text and stop. - `7`: Report "Template file missing — plugin configuration error" and stop. +Before `VALIDATION_SUCCESS`, stdout may contain one or more lines starting with `WARNING:` (for example, `WARNING: short idea (<N> chars); proceeding` when an inline idea is under 10 characters). Surface these warnings to the user in your final report but continue Phase 2 normally. `WARNING:` lines are informational, not errors. + Read the full contents of `IDEA_BODY_FILE` using the `Read` tool. Preserve byte-identical content in memory for later phases. --- diff --git a/docs/superpowers/plans/2026-04-20-gen-idea.md b/docs/superpowers/plans/2026-04-20-gen-idea.md index a7ba31d5..ace48003 100644 --- a/docs/superpowers/plans/2026-04-20-gen-idea.md +++ b/docs/superpowers/plans/2026-04-20-gen-idea.md @@ -254,6 +254,9 @@ else TMPFILE="$(mktemp "${TMPDIR:-/tmp}/gen-idea-inline-XXXXXX")" printf '%s\n' "$IDEA_INPUT" > "$TMPFILE" IDEA_BODY_FILE="$TMPFILE" + if (( ${#IDEA_INPUT} < 10 )); then + echo "WARNING: short idea (${#IDEA_INPUT} chars); proceeding" + fi slug_raw="$(printf '%s' "$IDEA_INPUT" | head -c 40 | tr '[:upper:]' '[:lower:]' | sed -E 's/[^a-z0-9-]+/-/g' | sed -E 's/-+/-/g' | sed -E 's/^-+//; s/-+$//')" if [[ -z "$slug_raw" ]]; then slug_raw="idea" @@ -497,7 +500,7 @@ Run: ``` Handle exit codes: -- `0`: Parse stdout to extract `INPUT_MODE`, `IDEA_BODY_FILE`, `OUTPUT_FILE`, `SLUG`, `TEMPLATE_FILE`, `N` (each appears on its own `KEY: value` line). Continue to Phase 2. +- `0`: Parse stdout to extract `INPUT_MODE`, `IDEA_BODY_FILE`, `OUTPUT_FILE`, `SLUG`, `TEMPLATE_FILE`, `N` (each appears on its own `KEY: value` line). Continue to Phase 2. (`SLUG` is informational — the script has already incorporated it into `OUTPUT_FILE`, so later phases do not need to use `SLUG` directly.) - `1`: Report "Missing or empty idea input" and stop. - `2`: Report "Input looks like a file path but is missing, not readable, or not `.md`" and stop. - `3`: Report "Output directory does not exist — please create it or choose a different path" and stop. @@ -506,6 +509,8 @@ Handle exit codes: - `6`: Report "Invalid arguments" with the stdout usage text and stop. - `7`: Report "Template file missing — plugin configuration error" and stop. +Before `VALIDATION_SUCCESS`, stdout may contain one or more lines starting with `WARNING:` (for example, `WARNING: short idea (<N> chars); proceeding` when an inline idea is under 10 characters). Surface these warnings to the user in your final report but continue Phase 2 normally. `WARNING:` lines are informational, not errors. + Read the full contents of `IDEA_BODY_FILE` using the `Read` tool. Preserve byte-identical content in memory for later phases. --- diff --git a/scripts/validate-gen-idea-io.sh b/scripts/validate-gen-idea-io.sh index 22aad3da..cc106f87 100755 --- a/scripts/validate-gen-idea-io.sh +++ b/scripts/validate-gen-idea-io.sh @@ -127,6 +127,9 @@ else TMPFILE="$(mktemp "${TMPDIR:-/tmp}/gen-idea-inline-XXXXXX")" printf '%s\n' "$IDEA_INPUT" > "$TMPFILE" IDEA_BODY_FILE="$TMPFILE" + if (( ${#IDEA_INPUT} < 10 )); then + echo "WARNING: short idea (${#IDEA_INPUT} chars); proceeding" + fi slug_raw="$(printf '%s' "$IDEA_INPUT" | head -c 40 | tr '[:upper:]' '[:lower:]' | sed -E 's/[^a-z0-9-]+/-/g' | sed -E 's/-+/-/g' | sed -E 's/^-+//; s/-+$//')" if [[ -z "$slug_raw" ]]; then slug_raw="idea" From 695766a625485a996a44bba419a7d94e82358c46 Mon Sep 17 00:00:00 2001 From: Sihao Liu <sihao@cs.ucla.edu> Date: Mon, 20 Apr 2026 16:27:16 -0700 Subject: [PATCH 92/97] chore: revert version to 1.16.0 --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index e72bd0aa..2e833ddc 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.1" + "version": "1.16.0" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 1ed2e046..fd77b933 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.1", + "version": "1.16.0", "author": { "name": "PolyArch" }, diff --git a/README.md b/README.md index 7d517d4b..05f2fdd3 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.16.1** +**Current Version: 1.16.0** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. From ad9fe531b83c8eb78378b61c8985b20e3e2a5a89 Mon Sep 17 00:00:00 2001 From: Sihao Liu <sihao@cs.ucla.edu> Date: Mon, 20 Apr 2026 16:32:55 -0700 Subject: [PATCH 93/97] remove superpowers docs from branch --- docs/superpowers/plans/2026-04-20-gen-idea.md | 921 ------------------ .../specs/2026-04-20-gen-idea-design.md | 210 ---- 2 files changed, 1131 deletions(-) delete mode 100644 docs/superpowers/plans/2026-04-20-gen-idea.md delete mode 100644 docs/superpowers/specs/2026-04-20-gen-idea-design.md diff --git a/docs/superpowers/plans/2026-04-20-gen-idea.md b/docs/superpowers/plans/2026-04-20-gen-idea.md deleted file mode 100644 index ace48003..00000000 --- a/docs/superpowers/plans/2026-04-20-gen-idea.md +++ /dev/null @@ -1,921 +0,0 @@ -# gen-idea Command Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Ship a `/humanize:gen-idea` slash command that takes a loose idea (inline text or `.md`) and writes a repo-grounded draft suitable as `/humanize:gen-plan`'s `--input`. - -**Architecture:** Five-phase command executed by Claude inside a single `commands/gen-idea.md` spec. A bash validation script resolves I/O and slug; Phase 2 produces N orthogonal directions; Phase 3 fans out N parallel `Explore` subagents in a single Task-tool message; Phase 4 synthesizes one primary direction plus N−1 alternatives into a templated draft. - -**Tech Stack:** Claude Code plugin command markdown, bash (validation + slug generation), `Explore` subagents via the Task tool, `Write` tool for final draft. No Codex, no tests, no config-loader integration in this first pass. - -**Spec reference:** `docs/superpowers/specs/2026-04-20-gen-idea-design.md` - -**Scope rule:** The project convention (`.claude/CLAUDE.md`) requires every commit on a non-`main` branch to keep `version` in `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json`, and `README.md` above `main`'s version. Branch `add-gen-idea-command` currently inherits `1.16.0` from `origin/dev`, which is already above `main`'s `1.15.4`. Task 5 bumps all three files to `1.16.1` in a single commit that also ships the README Quick Start entry. - ---- - -## File Structure - -Files created or modified by this plan: - -- **Create** `prompt-template/idea/gen-idea-template.md` — static markdown skeleton Claude fills in Phase 4. Mirrors `prompt-template/plan/gen-plan-template.md` style. -- **Create** `scripts/validate-gen-idea-io.sh` — pure bash; parses args, detects inline vs file input, generates slug, resolves default output path, validates `--n` range and output writability, locates template. Modeled on `scripts/validate-gen-plan-io.sh`. -- **Create** `commands/gen-idea.md` — the slash-command spec. Frontmatter declares `allowed-tools`; body drives Claude through phases 0–4. Modeled on a lean subset of `commands/gen-plan.md`. -- **Modify** `README.md` — add one Quick Start bullet above the `gen-plan` entry; bump `Current Version` to `1.16.1`. -- **Modify** `.claude-plugin/plugin.json` — bump `version` to `1.16.1`. -- **Modify** `.claude-plugin/marketplace.json` — bump `plugins[0].version` to `1.16.1`. - -No new subagent type, no test harness, no hook changes. - ---- - -## Task 1: Create the Draft Template - -**Files:** -- Create: `prompt-template/idea/gen-idea-template.md` - -- [ ] **Step 1.1: Create the template directory and file** - -Run: -```bash -mkdir -p prompt-template/idea -``` - -Write `prompt-template/idea/gen-idea-template.md`: - -```markdown -# <TITLE> - -## Original Idea - -<ORIGINAL_IDEA> - -## Primary Direction: <PRIMARY_NAME> - -### Rationale - -<PRIMARY_RATIONALE> - -### Approach Summary - -<PRIMARY_APPROACH_SUMMARY> - -### Objective Evidence - -<PRIMARY_OBJECTIVE_EVIDENCE> - -### Known Risks - -<PRIMARY_KNOWN_RISKS> - -## Alternative Directions Considered - -<ALTERNATIVES> - -## Synthesis Notes - -<SYNTHESIS_NOTES> -``` - -Placeholders Claude replaces in Phase 4: -- `<TITLE>` — inferred 4–10 word Title Case title. -- `<ORIGINAL_IDEA>` — verbatim copy of user's inline text or `.md` contents. Byte-identical. -- `<PRIMARY_NAME>` — chosen direction's short name. -- `<PRIMARY_RATIONALE>` / `<PRIMARY_APPROACH_SUMMARY>` / `<PRIMARY_OBJECTIVE_EVIDENCE>` (bulleted) / `<PRIMARY_KNOWN_RISKS>` — populated from the chosen proposal. -- `<ALTERNATIVES>` — block containing Alt-1..Alt-(N−1) subsections, each with `### Alt-N: <name>`, `- Gist:`, `- Objective Evidence:` (bullets), `- Why not primary:`. -- `<SYNTHESIS_NOTES>` — one paragraph on which alt elements could fold into the primary. - -- [ ] **Step 1.2: Verify the file is written correctly** - -Run: -```bash -cat prompt-template/idea/gen-idea-template.md -``` - -Expected: the exact content above prints to stdout, no errors. - -- [ ] **Step 1.3: Commit** - -Run: -```bash -git add prompt-template/idea/gen-idea-template.md -git commit -m "$(cat <<'EOF' -feat(gen-idea): add draft template for directed-swarm output - -Skeleton populated by the command's Phase 4 synthesis step. Placeholders -<TITLE>, <ORIGINAL_IDEA>, <PRIMARY_*>, <ALTERNATIVES>, <SYNTHESIS_NOTES> -are filled deterministically by the command body. - -Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> -EOF -)" -``` - -Expected: commit succeeds; `git log -1 --stat` shows the new file added. - ---- - -## Task 2: Create the IO Validation Script - -**Files:** -- Create: `scripts/validate-gen-idea-io.sh` - -- [ ] **Step 2.1: Write the script** - -Write `scripts/validate-gen-idea-io.sh`: - -```bash -#!/usr/bin/env bash -# validate-gen-idea-io.sh -# Validates input, slug, and output paths for the gen-idea command. -# Exit codes: -# 0 - Success -# 1 - Missing idea input or empty input file -# 2 - Input looks like a path but is unreadable, not .md, or does not exist -# 3 - Output parent directory does not exist (user-supplied path only) -# 4 - Output file already exists -# 5 - No write permission to output directory -# 6 - Invalid arguments (including --n out of range) -# 7 - Template file not found (plugin configuration error) - -set -e - -usage() { - echo "Usage: $0 <idea-text-or-path> [--n <int>] [--output <path>]" - echo "" - echo "Arguments:" - echo " <idea-text-or-path> Inline idea text OR path to an existing .md file (required)" - echo " --n Number of directions (default: 6; range: 2-10)" - echo " --output Output draft path (default: .humanize/ideas/<slug>-<timestamp>.md)" - echo " -h, --help Show this help message" - exit 6 -} - -IDEA_INPUT="" -N=6 -OUTPUT_FILE="" - -while [[ $# -gt 0 ]]; do - case "$1" in - --n) - if [[ $# -lt 2 || "$2" == --* ]]; then - echo "ERROR: --n requires a value" - usage - fi - N="$2" - shift 2 - ;; - --output) - if [[ $# -lt 2 || "$2" == --* ]]; then - echo "ERROR: --output requires a value" - usage - fi - OUTPUT_FILE="$2" - shift 2 - ;; - -h|--help) - usage - ;; - --*) - echo "ERROR: Unknown option: $1" - usage - ;; - *) - if [[ -z "$IDEA_INPUT" ]]; then - IDEA_INPUT="$1" - shift - else - echo "ERROR: Unexpected positional argument: $1" - usage - fi - ;; - esac -done - -if [[ -z "$IDEA_INPUT" ]]; then - echo "VALIDATION_ERROR: MISSING_IDEA" - echo "No idea provided. Pass inline text or a .md file path as the first argument." - exit 1 -fi - -if ! [[ "$N" =~ ^[0-9]+$ ]]; then - echo "VALIDATION_ERROR: INVALID_N" - echo "--n must be a non-negative integer; got: $N" - exit 6 -fi -if (( N < 2 || N > 10 )); then - echo "VALIDATION_ERROR: N_OUT_OF_RANGE" - echo "--n must be between 2 and 10 inclusive; got: $N" - exit 6 -fi - -INPUT_MODE="" -IDEA_BODY_FILE="" -SLUG="" - -# Detect whether IDEA_INPUT is meant as a file path. The `-f` test on line -# below is the primary gate; this heuristic only matters when that test fails -# and we must decide whether to emit INPUT_NOT_FOUND (user meant a path) or -# treat the text as inline. Limitation: a path that contains spaces AND does -# not exist falls through to inline mode silently, because the space rule is -# how we avoid misclassifying inline ideas that happen to contain "/". -looks_like_path=false -if [[ "$IDEA_INPUT" == *.md ]]; then - looks_like_path=true -elif [[ "$IDEA_INPUT" == */* && "$IDEA_INPUT" != *" "* ]]; then - looks_like_path=true -fi - -if [[ -f "$IDEA_INPUT" ]]; then - if [[ "$IDEA_INPUT" != *.md ]]; then - echo "VALIDATION_ERROR: INPUT_NOT_MD" - echo "File input must have .md extension; got: $IDEA_INPUT" - exit 2 - fi - if [[ ! -s "$IDEA_INPUT" ]]; then - echo "VALIDATION_ERROR: INPUT_EMPTY" - echo "Input file is empty: $IDEA_INPUT" - exit 1 - fi - INPUT_MODE="file" - IDEA_BODY_FILE="$(realpath "$IDEA_INPUT" 2>/dev/null || echo "$IDEA_INPUT")" - base="$(basename "$IDEA_INPUT")" - SLUG="${base%.md}" -elif [[ "$looks_like_path" == true ]]; then - echo "VALIDATION_ERROR: INPUT_NOT_FOUND" - echo "Looks like a file path but does not exist: $IDEA_INPUT" - exit 2 -else - INPUT_MODE="inline" - # Deliberately no `trap ... EXIT` to remove TMPFILE: the caller consumes - # IDEA_BODY_FILE after this script exits, so a naive trap would delete - # the file the caller needs. On error paths the tempfile is leaked in - # $TMPDIR; the OS sweeps $TMPDIR on reboot and the cost is negligible. - TMPFILE="$(mktemp "${TMPDIR:-/tmp}/gen-idea-inline-XXXXXX")" - printf '%s\n' "$IDEA_INPUT" > "$TMPFILE" - IDEA_BODY_FILE="$TMPFILE" - if (( ${#IDEA_INPUT} < 10 )); then - echo "WARNING: short idea (${#IDEA_INPUT} chars); proceeding" - fi - slug_raw="$(printf '%s' "$IDEA_INPUT" | head -c 40 | tr '[:upper:]' '[:lower:]' | sed -E 's/[^a-z0-9-]+/-/g' | sed -E 's/-+/-/g' | sed -E 's/^-+//; s/-+$//')" - if [[ -z "$slug_raw" ]]; then - slug_raw="idea" - fi - SLUG="$slug_raw" -fi - -PROJECT_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" - -DEFAULT_OUTPUT=false -if [[ -z "$OUTPUT_FILE" ]]; then - TIMESTAMP="$(date +%Y%m%d-%H%M%S)" - OUTPUT_FILE="$PROJECT_ROOT/.humanize/ideas/${SLUG}-${TIMESTAMP}.md" - DEFAULT_OUTPUT=true -fi - -OUTPUT_FILE="$(realpath -m "$OUTPUT_FILE" 2>/dev/null || echo "$OUTPUT_FILE")" -OUTPUT_DIR="$(dirname "$OUTPUT_FILE")" - -if [[ "$DEFAULT_OUTPUT" == true ]]; then - mkdir -p "$OUTPUT_DIR" 2>/dev/null || true -fi - -if [[ ! -d "$OUTPUT_DIR" ]]; then - echo "VALIDATION_ERROR: OUTPUT_DIR_NOT_FOUND" - echo "Output directory does not exist: $OUTPUT_DIR" - exit 3 -fi - -if [[ -e "$OUTPUT_FILE" ]]; then - echo "VALIDATION_ERROR: OUTPUT_EXISTS" - echo "Output already exists: $OUTPUT_FILE" - exit 4 -fi - -if [[ ! -w "$OUTPUT_DIR" ]]; then - echo "VALIDATION_ERROR: NO_WRITE_PERMISSION" - echo "No write permission: $OUTPUT_DIR" - exit 5 -fi - -if [[ -n "${CLAUDE_PLUGIN_ROOT:-}" ]]; then - TEMPLATE_FILE="$CLAUDE_PLUGIN_ROOT/prompt-template/idea/gen-idea-template.md" -else - SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" - TEMPLATE_FILE="$SCRIPT_DIR/../prompt-template/idea/gen-idea-template.md" -fi - -if [[ ! -f "$TEMPLATE_FILE" ]]; then - echo "VALIDATION_ERROR: TEMPLATE_NOT_FOUND" - echo "Template file missing: $TEMPLATE_FILE" - exit 7 -fi - -echo "VALIDATION_SUCCESS" -echo "INPUT_MODE: $INPUT_MODE" -echo "IDEA_BODY_FILE: $IDEA_BODY_FILE" -echo "OUTPUT_FILE: $OUTPUT_FILE" -echo "SLUG: $SLUG" -echo "TEMPLATE_FILE: $TEMPLATE_FILE" -echo "N: $N" -exit 0 -``` - -- [ ] **Step 2.2: Make the script executable** - -Run: -```bash -chmod +x scripts/validate-gen-idea-io.sh -ls -l scripts/validate-gen-idea-io.sh -``` - -Expected: permissions include `x` (e.g. `-rwxr-xr-x`). - -- [ ] **Step 2.3: Smoke-test happy path (inline input)** - -Run: -```bash -scripts/validate-gen-idea-io.sh "add undo/redo to the editor" --n 4 -``` - -Expected stdout contains: -``` -VALIDATION_SUCCESS -INPUT_MODE: inline -IDEA_BODY_FILE: /tmp/... (or $TMPDIR/...) -OUTPUT_FILE: .../.humanize/ideas/add-undo-redo-to-the-editor-<timestamp>.md -SLUG: add-undo-redo-to-the-editor -TEMPLATE_FILE: .../prompt-template/idea/gen-idea-template.md -N: 4 -``` -Exit code: `0`. - -If `.humanize/ideas/` was created under project root, remove it with `rmdir .humanize/ideas .humanize 2>/dev/null || true` to keep the tree clean between iterations. - -- [ ] **Step 2.4: Smoke-test happy path (file input)** - -Run: -```bash -mkdir -p /tmp/gen-idea-test -printf 'add hotkey support\n\nwith modifier chords\n' > /tmp/gen-idea-test/notes.md -scripts/validate-gen-idea-io.sh /tmp/gen-idea-test/notes.md -``` - -Expected: `INPUT_MODE: file`, `SLUG: notes`, `IDEA_BODY_FILE` equals the realpath of `/tmp/gen-idea-test/notes.md`, exit `0`. - -- [ ] **Step 2.5: Smoke-test error paths** - -Each of these must produce the noted exit code: - -```bash -# Missing idea -scripts/validate-gen-idea-io.sh ; echo "exit=$?" -# Expect: exit=1, MISSING_IDEA - -# --n out of range -scripts/validate-gen-idea-io.sh "x" --n 1 ; echo "exit=$?" -# Expect: exit=6, N_OUT_OF_RANGE - -scripts/validate-gen-idea-io.sh "x" --n 99 ; echo "exit=$?" -# Expect: exit=6, N_OUT_OF_RANGE - -# --n non-integer -scripts/validate-gen-idea-io.sh "x" --n abc ; echo "exit=$?" -# Expect: exit=6, INVALID_N - -# Path that looks like file but missing -scripts/validate-gen-idea-io.sh path/that/does/not/exist.md ; echo "exit=$?" -# Expect: exit=2, INPUT_NOT_FOUND - -# Non-.md file -touch /tmp/gen-idea-test/notes.txt -scripts/validate-gen-idea-io.sh /tmp/gen-idea-test/notes.txt ; echo "exit=$?" -# Expect: exit=2, INPUT_NOT_MD - -# Empty .md -printf '' > /tmp/gen-idea-test/empty.md -scripts/validate-gen-idea-io.sh /tmp/gen-idea-test/empty.md ; echo "exit=$?" -# Expect: exit=1, INPUT_EMPTY - -# Output already exists -touch /tmp/gen-idea-test/out.md -scripts/validate-gen-idea-io.sh "x" --output /tmp/gen-idea-test/out.md ; echo "exit=$?" -# Expect: exit=4, OUTPUT_EXISTS - -# Output parent dir missing (user-supplied) -scripts/validate-gen-idea-io.sh "x" --output /tmp/gen-idea-no-such-dir/out.md ; echo "exit=$?" -# Expect: exit=3, OUTPUT_DIR_NOT_FOUND -``` - -Cleanup: -```bash -rm -rf /tmp/gen-idea-test -rm -rf "$(pwd)/.humanize" 2>/dev/null || true -``` - -If any exit code differs, edit the script to match the expected codes and re-run the failing case before moving on. - -- [ ] **Step 2.6: Commit** - -Run: -```bash -git add scripts/validate-gen-idea-io.sh -git commit -m "$(cat <<'EOF' -feat(gen-idea): add IO validation script with slug resolution - -Detects inline vs .md file input, writes inline text to a tempfile so -downstream phases always consume a file, derives a URL-safe slug, and -resolves the default output path under .humanize/ideas/. Exit codes -parallel validate-gen-plan-io.sh. - -Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> -EOF -)" -``` - -Expected: commit succeeds. - ---- - -## Task 3: Create the Command Spec - -**Files:** -- Create: `commands/gen-idea.md` - -- [ ] **Step 3.1: Write the command spec** - -Write `commands/gen-idea.md`: - -````markdown ---- -description: "Generate a repo-grounded idea draft via directed-swarm exploration" -argument-hint: "<idea-text-or-path> [--n <int>] [--output <path>]" -allowed-tools: - - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/validate-gen-idea-io.sh:*)" - - "Read" - - "Glob" - - "Grep" - - "Task" - - "Write" ---- - -# Generate Idea Draft from Loose Input - -Read and execute below with ultrathink. - -## Hard Constraint: Draft-Only Output - -This command MUST NOT implement features, modify source code, or create commits while producing the draft. Permitted writes are limited to the single output draft file produced in Phase 4; prerequisite directory creation for the default `.humanize/ideas/` path by the validation script is permitted as part of that write. All exploration subagents run read-only. - -This command transforms a loose idea into a repo-grounded draft suitable as input to `/humanize:gen-plan`. It applies directed-diversity exploration: a lead picks N orthogonal directions, N parallel `Explore` subagents develop each, the lead synthesizes a draft with one primary direction plus N-1 alternatives. Each direction carries objective evidence from the repo. - -## Workflow Overview - -> **Sequential Execution Constraint**: All phases MUST execute strictly in order. Each phase fully completes before the next. - -1. Parse Input -2. IO Validation -3. Direction Generation -4. Parallel Exploration -5. Synthesis and Write - ---- - -## Phase 0: Parse Input - -Extract from `$ARGUMENTS`: -- First positional: inline idea text or path to a `.md` file (required). -- `--n <int>`: number of directions. Default 6. -- `--output <path>`: target draft path. Default resolved by the validation script. - -Do not interpret or rewrite the idea text here. Pass `$ARGUMENTS` through to Phase 1 unchanged. - ---- - -## Phase 1: IO Validation - -Run: -```bash -"${CLAUDE_PLUGIN_ROOT}/scripts/validate-gen-idea-io.sh" $ARGUMENTS -``` - -Handle exit codes: -- `0`: Parse stdout to extract `INPUT_MODE`, `IDEA_BODY_FILE`, `OUTPUT_FILE`, `SLUG`, `TEMPLATE_FILE`, `N` (each appears on its own `KEY: value` line). Continue to Phase 2. (`SLUG` is informational — the script has already incorporated it into `OUTPUT_FILE`, so later phases do not need to use `SLUG` directly.) -- `1`: Report "Missing or empty idea input" and stop. -- `2`: Report "Input looks like a file path but is missing, not readable, or not `.md`" and stop. -- `3`: Report "Output directory does not exist — please create it or choose a different path" and stop. -- `4`: Report "Output file already exists — choose a different path" and stop. -- `5`: Report "No write permission to output directory" and stop. -- `6`: Report "Invalid arguments" with the stdout usage text and stop. -- `7`: Report "Template file missing — plugin configuration error" and stop. - -Before `VALIDATION_SUCCESS`, stdout may contain one or more lines starting with `WARNING:` (for example, `WARNING: short idea (<N> chars); proceeding` when an inline idea is under 10 characters). Surface these warnings to the user in your final report but continue Phase 2 normally. `WARNING:` lines are informational, not errors. - -Read the full contents of `IDEA_BODY_FILE` using the `Read` tool. Preserve byte-identical content in memory for later phases. - ---- - -## Phase 2: Direction Generation - -Generate exactly `N` orthogonal directions for exploring the idea. - -### Context to Gather - -Before generating directions, read (paths relative to the project root, which is `$(git rev-parse --show-toplevel)`): -- `README.md` at the project root. -- `CLAUDE.md` at the project root (if it exists). -- `.claude/CLAUDE.md` (if it exists). -- Top-level directory listing via `Glob` with pattern `*` (one level, no recursion). - -This context grounds the directions in the actual repo rather than generic brainstorming. - -### Generation Rules - -Produce exactly `N` direction entries. Each entry has: -- `name`: a 2-5 word short label. -- `rationale`: a single sentence explaining why this angle is distinct from the other directions. - -Hard constraint: **orthogonality**. Two near-duplicate directions defeat the directed-diversity premise. Before returning: -- If two directions feel like dupes, replace one with a genuinely different angle. -- If a direction collapses to "just do X better" with no angle distinction, replace it. -- Do not emit directions that merely restate the idea in different words. - -### Retry and Degradation - -- If the first pass returns fewer than `N` entries, regenerate once with an explicit "you MUST produce `N` orthogonal directions" instruction. -- If the second pass still returns fewer than `N` but at least 2, proceed with the reduced count and emit a warning to the user: `Warning: direction generation returned <count> of <N> requested directions; proceeding with reduced count.` -- If fewer than 2 directions are produced, stop with error: `direction generation degraded; retry.` - -Store the final direction list as `DIRECTIONS` (ordered; index 0..len-1). - ---- - -## Phase 3: Parallel Exploration - -Dispatch all directions in a **single Task-tool message** containing one Task invocation per direction. This is the W2S parallel-swarm step. - -### Subagent Invocation - -For each direction in `DIRECTIONS`, launch one `Explore` subagent. Each invocation prompt MUST include: - -1. A verbatim copy of the idea body loaded from `IDEA_BODY_FILE` in Phase 1. -2. The assigned direction (name + rationale). -3. The following instruction block (reproduce verbatim in the subagent prompt): - -> Explore this direction within the current repo. Gather OBJECTIVE EVIDENCE: -> - Specific repo paths with existing patterns worth extending. -> - Prior art or precedent in the codebase or adjacent tooling. -> - Measurable considerations (approximate complexity, LOC surface, performance implications) where discoverable from reading the code. -> -> Read-only. Do not write any files. -> -> If no concrete evidence exists for this direction, report the literal string `exploratory, no concrete precedent` once in OBJECTIVE_EVIDENCE and stop exploring further. Fabrication of references is forbidden. -> -> Return a structured proposal with exactly these fields: -> - `APPROACH_SUMMARY`: concrete design description (what to build, core mechanism, affected components). -> - `OBJECTIVE_EVIDENCE`: bullet list of repo paths, prior art, or the `exploratory, no concrete precedent` sentinel. -> - `KNOWN_RISKS`: short bullet list. -> - `CONFIDENCE`: one of `high`, `medium`, `low`. - -### Collection and Degradation - -Collect all subagent responses. For each response: -- Parse the four required fields. If a field is missing, mark that proposal as degraded and drop it. -- If fewer than 2 proposals survive, stop with error: `exploration phase degraded; retry.` -- Otherwise continue with the surviving proposals. - -Associate each surviving proposal with its originating direction (so Phase 4 can label it with the original direction name). When numbering alternatives in Phase 4 after any drops, renumber survivors sequentially as Alt-1..Alt-K (where K is the count of surviving non-primary directions). Do not preserve gaps from dropped proposals. - ---- - -## Phase 4: Synthesis and Write - -### Step 4.1: Pick the Primary Direction - -Review all surviving proposals. Choose the strongest as the primary based on: -1. Evidence density — more concrete repo references outranks fewer. -2. Fit with existing repo patterns — extending patterns outranks introducing unfamiliar paradigms. -3. Implementation surface area — prefer smaller surface where quality is otherwise comparable. -4. Declared `CONFIDENCE` — `high` > `medium` > `low` as tiebreaker. - -Record the chosen direction as `PRIMARY`; the remaining surviving directions become the Alt-1..Alt-K list (where K is the number of non-primary survivors, K ≤ N-1), numbered sequentially in their original direction order with no gaps for any dropped proposals. - -### Step 4.2: Infer Title - -Generate a 4-10 word Title Case title that captures the primary direction, not the original input phrasing verbatim. Example: idea `add undo/redo` with primary direction `command-pattern history` yields title `Command-Pattern Undo Stack For The Editor`. - -### Step 4.3: Populate the Template - -Read the template file located at `TEMPLATE_FILE` (from Phase 1 stdout). - -Produce the finalized draft content in memory by replacing placeholders: -- `<TITLE>` — the inferred title. -- `<ORIGINAL_IDEA>` — byte-identical contents of `IDEA_BODY_FILE`. Preserve line breaks, trailing newline, and all formatting. Do NOT paraphrase or re-indent. -- `<PRIMARY_NAME>` — primary direction's short name. -- `<PRIMARY_RATIONALE>` — primary direction's rationale (from Phase 2). -- `<PRIMARY_APPROACH_SUMMARY>` — primary proposal's `APPROACH_SUMMARY`. -- `<PRIMARY_OBJECTIVE_EVIDENCE>` — primary proposal's `OBJECTIVE_EVIDENCE`, rendered as a bullet list. If the subagent returned only the literal sentinel `exploratory, no concrete precedent`, render it as a single bullet: `- exploratory, no concrete precedent`. -- `<PRIMARY_KNOWN_RISKS>` — primary proposal's `KNOWN_RISKS`, rendered as a bullet list. -- `<ALTERNATIVES>` — for each non-primary survivor at its Alt index `i` (1-based, sequential per Step 4.1), emit: - - ```markdown - ### Alt-<i>: <name> - - Gist: <one-paragraph summary derived from APPROACH_SUMMARY> - - Objective Evidence: - - <bullet from OBJECTIVE_EVIDENCE> - - ... - - Why not primary: <one sentence stating the tradeoff vs PRIMARY> - ``` - - Separate consecutive Alt entries with a single blank line. - -- `<SYNTHESIS_NOTES>` — one paragraph describing which elements from the alternatives could fold into the primary if the user chose a different direction. This is the lead's own synthesis note, not a subagent output. - -### Step 4.4: Write the Draft File - -Write the finalized content to `OUTPUT_FILE` using the `Write` tool. Single write; no progressive edits. - -### Step 4.5: Report - -Report to the user: -- Path written (`OUTPUT_FILE`). -- Primary direction name. -- Requested `N` and the actual direction count (note if reduced due to degradation). -- Next-step hint: `To turn this draft into a plan, run: /humanize:gen-plan --input <OUTPUT_FILE> --output <plan-path>`. - ---- - -## Error Handling - -- Phase 1 validation errors stop the command with a clear message. No partial output. -- Phase 2 degradation follows the retry-once + ≥2 minimum rule stated above. -- Phase 3 degradation follows the drop-and-continue + ≥2 minimum rule stated above. -- Never fabricate repo references or prior art. The `exploratory, no concrete precedent` sentinel from subagents is preserved verbatim in the draft. -- If any phase stops with an error, do not write a partial `OUTPUT_FILE`. -```` - -Notes on the triple-backtick-four fence (````): the command file itself contains fenced code blocks, so the outer fence in this plan uses four backticks to avoid collision. - -- [ ] **Step 3.2: Verify the command spec parses as markdown** - -Run: -```bash -head -20 commands/gen-idea.md -``` - -Expected: the frontmatter block (lines 1-9) appears first, then the `# Generate Idea Draft from Loose Input` heading. No stray backticks or broken YAML. - -Also check the frontmatter is well-formed: -```bash -awk '/^---$/{n++; if(n==2) exit} n==1' commands/gen-idea.md -``` - -Expected: a list of YAML keys (`description`, `argument-hint`, `allowed-tools`) with valid values. - -- [ ] **Step 3.3: Commit** - -Run: -```bash -git add commands/gen-idea.md -git commit -m "$(cat <<'EOF' -feat(gen-idea): add slash-command spec for directed-swarm drafting - -Five-phase command: parse, validate via scripts/validate-gen-idea-io.sh, -generate N orthogonal directions grounded in repo context, fan out N -parallel Explore subagents in a single Task-tool message, synthesize -one primary plus N-1 alternatives into the new template, write the -draft. No Codex, no tests, no config-loader integration in this pass. - -Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> -EOF -)" -``` - -Expected: commit succeeds. - ---- - -## Task 4: Update README and Bump Versions - -**Files:** -- Modify: `README.md` -- Modify: `.claude-plugin/plugin.json` -- Modify: `.claude-plugin/marketplace.json` - -- [ ] **Step 4.1: Bump `.claude-plugin/plugin.json`** - -Open the file and change the `version` field from `"1.16.0"` to `"1.16.1"`. The full resulting file: - -```json -{ - "name": "humanize", - "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.1", - "author": { - "name": "PolyArch" - }, - "repository": "https://github.com/PolyArch/humanize", - "homepage": "https://github.com/PolyArch/humanize#readme", - "license": "MIT", - "keywords": [ - "iterative-development", - "codex-review", - "ai-native", - "feedback-loop", - "goal-tracking", - "quality-assurance" - ] -} -``` - -Verify: -```bash -grep '"version"' .claude-plugin/plugin.json -``` -Expected: ` "version": "1.16.1",` - -- [ ] **Step 4.2: Bump `.claude-plugin/marketplace.json`** - -Change `plugins[0].version` from `"1.16.0"` to `"1.16.1"`. Full resulting file: - -```json -{ - "name": "PolyArch", - "owner": { - "name": "PolyArch" - }, - "plugins": [ - { - "name": "humanize", - "source": "./", - "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.1" - } - ] -} -``` - -Verify: -```bash -grep '"version"' .claude-plugin/marketplace.json -``` -Expected: ` "version": "1.16.1"` - -- [ ] **Step 4.3: Update `README.md`** - -Make two edits: - -**Edit 1** — bump the version header. Change line: -``` -**Current Version: 1.16.0** -``` -to: -``` -**Current Version: 1.16.1** -``` - -**Edit 2** — insert a new Quick Start step for `gen-idea` BEFORE the existing `gen-plan` step. - -Locate the existing Quick Start section. It currently begins: -``` -## Quick Start - -1. **Generate a plan** from your draft: - ```bash - /humanize:gen-plan --input draft.md --output docs/plan.md - ``` -``` - -Replace that block with: -``` -## Quick Start - -1. **Generate an idea draft** from a loose thought (optional — skip if you already have a draft): - ```bash - /humanize:gen-idea "add undo/redo to the editor" - ``` - Output goes to `.humanize/ideas/<slug>-<timestamp>.md` by default. Pass a `.md` path to expand existing rough notes. `--n` controls how many parallel directions explore the idea (default 6). - -2. **Generate a plan** from your draft: - ```bash - /humanize:gen-plan --input draft.md --output docs/plan.md - ``` -``` - -Renumber the remaining Quick Start items in that section accordingly (the existing `refine-plan`, `start-rlcr-loop`, `gemini` steps shift down by one). - -Verify: -```bash -grep -n "Current Version" README.md -grep -n "gen-idea" README.md -``` -Expected: the version line now reads `1.16.1`; the `gen-idea` mention appears in Quick Start. - -- [ ] **Step 4.4: Run a quick project rule sanity check** - -Run: -```bash -grep '"version"' .claude-plugin/plugin.json .claude-plugin/marketplace.json -grep 'Current Version' README.md -``` - -All three must show `1.16.1`. The three-file version sync is a project-level rule enforced by CI; mismatches will block the PR. - -- [ ] **Step 4.5: Commit** - -Run: -```bash -git add README.md .claude-plugin/plugin.json .claude-plugin/marketplace.json -git commit -m "$(cat <<'EOF' -feat(gen-idea): document command in README, bump to 1.16.1 - -Add a Quick Start entry for /humanize:gen-idea above the existing -gen-plan step and sync version across plugin.json, marketplace.json, -and the README header. - -Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> -EOF -)" -``` - -Expected: commit succeeds; `git log --oneline -5` shows four feat commits for gen-idea (template, script, command, docs). - ---- - -## Task 5: End-to-End Smoke Check - -**Files:** no new files. This task verifies the full command integrates and produces a valid draft. - -This task requires the validation script and command spec from Tasks 2–3 to be present and committed. It does NOT invoke Claude via a nested agent — it walks the user through a manual smoke test they (or an implementing engineer) run interactively in Claude Code to confirm the command wires up end-to-end. - -- [ ] **Step 5.1: Invoke the command with an inline idea** - -In a Claude Code session with the plugin installed from this branch, run: -``` -/humanize:gen-idea "add bulk-tag operations to the note editor" --n 3 -``` - -Expected: -- Claude reports running `validate-gen-idea-io.sh` and parses `INPUT_MODE: inline`. -- Claude produces exactly 3 orthogonal directions (or emits the documented warning and proceeds with ≥2). -- Claude dispatches Explore subagents in parallel. -- Claude writes a draft to `.humanize/ideas/add-bulk-tag-operations-to-the-note-editor-<timestamp>.md`. -- Final report names the primary direction and the suggested `gen-plan` follow-up command. - -- [ ] **Step 5.2: Inspect the generated draft** - -Run: -```bash -ls -la .humanize/ideas/ -cat .humanize/ideas/add-bulk-tag-operations-to-the-note-editor-*.md -``` - -Expected: exactly one new file. The content has these sections in order: -- `# <Title>` -- `## Original Idea` — contains the exact inline text `add bulk-tag operations to the note editor` (verbatim). -- `## Primary Direction: <Name>` with Rationale, Approach Summary, Objective Evidence, Known Risks subsections populated. -- `## Alternative Directions Considered` with `### Alt-1: ...` and `### Alt-2: ...` subsections (since N=3 → 1 primary + 2 alts). -- `## Synthesis Notes` — one paragraph. - -No placeholder strings like `<TITLE>` or `<PRIMARY_NAME>` remain in the output. - -- [ ] **Step 5.3: Chain into gen-plan to confirm downstream compatibility** - -Run: -``` -/humanize:gen-plan --input .humanize/ideas/add-bulk-tag-operations-to-the-note-editor-<timestamp>.md --output /tmp/gen-idea-smoke-plan.md --direct -``` - -(Use `--direct` to skip the convergence loop for this smoke check; the goal is only to confirm the draft passes `gen-plan` Phase 2 relevance and produces a plan skeleton.) - -Expected: -- gen-plan's relevance check passes. -- A plan file appears at `/tmp/gen-idea-smoke-plan.md` with the standard gen-plan structure (Goal Description, Acceptance Criteria, Path Boundaries, etc.). - -- [ ] **Step 5.4: Cleanup and record findings** - -Run: -```bash -rm -rf .humanize/ideas -rm -f /tmp/gen-idea-smoke-plan.md -``` - -If any step produced unexpected output (wrong exit code, missing section, fabricated evidence), add a follow-up task noting what to fix and which file to edit. Otherwise this plan is complete. - -- [ ] **Step 5.5: Final push** - -Once Tasks 1–4 are committed and Task 5 smoke-passes, push the branch: -```bash -git push -u origin add-gen-idea-command -``` - -Open a PR targeting `dev` (not `main`) — the branch was cut from `origin/dev` and is intended for the `1.16.x` release train. The project's CI check enforces that `feat:` branches target `dev`, not `main`. - ---- - -## Self-Review Notes (Completed During Plan Authoring) - -- **Spec coverage**: every Out-of-Scope-first-pass item in the spec is reflected here as "not done" (no Codex tasks, no config-loader tasks, no test harness). Every in-scope file listed in the spec has a dedicated task. -- **Placeholder scan**: no TBD/TODO/"handle errors appropriately" remain. Every code block shows exact content. -- **Type consistency**: `IDEA_BODY_FILE`, `OUTPUT_FILE`, `SLUG`, `TEMPLATE_FILE`, `N`, `INPUT_MODE` names match across the validation script, the command spec, and the smoke-check expectations. Exit codes are consistent between the script definition, the command error-handling table, and the smoke-test expected values. -- **Scope**: single subsystem (one command + template + script + doc bump). No decomposition needed. diff --git a/docs/superpowers/specs/2026-04-20-gen-idea-design.md b/docs/superpowers/specs/2026-04-20-gen-idea-design.md deleted file mode 100644 index b2e69e10..00000000 --- a/docs/superpowers/specs/2026-04-20-gen-idea-design.md +++ /dev/null @@ -1,210 +0,0 @@ -# gen-idea — Directed-Swarm Idea Drafting (Design) - -## Context - -Humanize today starts at `gen-plan`, which takes a user-authored draft `.md` and produces a structured plan via a single Codex first-pass + a Claude/Codex convergence loop. The draft itself — the "most valuable human input" that `gen-plan` preserves verbatim — still has to be hand-authored. - -This spec introduces a new command `/humanize:gen-idea` that sits one step earlier in the flow. It takes a loose idea (inline text or a `.md` of notes) and produces a repo-grounded draft suitable as `gen-plan`'s `--input`. - -The command borrows its core mechanic from the Anthropic alignment note *Automated W2S Researcher* (2026). That work showed that when nine agents received **different high-level directions** ("study data filtering", "study distillation", "study evolutionary search") they decisively outperformed nine agents given the same task description — directed diversity climbs faster than undirected replication. `gen-idea` applies the same insight to idea generation: instead of one LLM pass, a lead picks N orthogonal directions and delegates one direction per subagent, then synthesizes. - -## Goal - -One shipable command, `/humanize:gen-idea`, that: - -- Accepts a loose idea (inline text or `.md` path) and a desired direction count. -- Spawns N parallel read-only exploration subagents, each assigned a distinct direction. -- Writes a single draft `.md` that chooses one primary direction, lists alternatives, and grounds each in objective repo evidence. -- Produces output that passes through `gen-plan --input <draft>` unchanged. - -No Codex, no RLCR, no auto-chaining, no relevance check, no config-loader integration. Lightweight first pass — everything downstream already exists. - -## Out of Scope (First Pass) - -- Codex involvement in idea phase (delegated to downstream `gen-plan`). -- Relevance check against repo (delegated to `gen-plan` Phase 2). -- Config-loader integration (`.humanize/config.json` not read). -- Alternative-language translation variant. -- Auto-chain to `gen-plan`. -- Test harness, CI coverage, or telemetry. -- `--directions` override flag (directions are LLM-picked per topic). - -Each of these may land in a follow-up once the primary flow is proven. - -## Command Signature - -``` -/humanize:gen-idea <idea-text-or-path> [--n 6] [--output <path>] -``` - -**Input auto-detection**: if the positional arg resolves to an existing file AND ends in `.md`, it is read as file content; otherwise the arg is treated verbatim as inline idea text. - -**Parameters** - -- Positional (required): idea body as inline text or path to a `.md` file. Must be non-empty after parsing. -- `--n <int>` (optional, default `6`): direction count. Valid range `[2, 10]`. Out-of-range stops the command. -- `--output <path>` (optional): target draft path. Default `.humanize/ideas/<slug>-<YYYYMMDD-HHMMSS>.md` relative to project root. - -**Slug construction** - -- File input → filename stem (extension removed). -- Inline input → lowercase first ~40 chars of the idea, strip non-alphanumeric (keep `-`), collapse dash runs, trim leading/trailing dashes. -- Empty result → fallback `idea`. - -**Path behavior** - -- For the **default** `--output`, the command auto-creates `.humanize/ideas/` if missing. This matches the implicit contract that Humanize owns its own `.humanize/` subtree. -- For a **user-supplied** `--output`, the parent directory must already exist. This mirrors `gen-plan`'s stance and avoids silently creating arbitrary directories. -- Output file must not already exist. Refuse to overwrite. - -## Architecture - -Five phases inside a single command file `commands/gen-idea.md`, strictly sequential: - -### Phase 0 — Parse Input - -Parse `$ARGUMENTS`. Set `IDEA_INPUT`, `N` (default 6), `OUTPUT_FILE` (default path if unset). - -### Phase 1 — IO Validation - -Call `scripts/validate-gen-idea-io.sh` with the resolved flags. The script: - -- Distinguishes inline vs file input. -- Writes inline text to a tempfile under `$TMPDIR` and prints its path (so downstream phases always consume a file, simplifying the command body). -- Verifies `--n` is an integer in `[2, 10]`. -- Creates the default output directory when the default path is used; rejects non-existent parent directory for user-supplied paths. -- Refuses to overwrite an existing output file. -- Locates `prompt-template/idea/gen-idea-template.md`. - -Exit codes parallel `validate-gen-plan-io.sh` (distinct failures get distinct codes; `6` is "invalid arguments"; `7` is "template missing"). - -Script stdout contains `INPUT_MODE`, `IDEA_BODY_FILE`, `OUTPUT_FILE`, `SLUG`, `TEMPLATE_FILE`, `N` for the command to consume. - -### Phase 2 — Direction Generation - -One Claude pass. Inputs: the idea body, the repo README, the project `CLAUDE.md` (if any), and a top-level directory listing. Output: exactly `N` orthogonal directions, each with: - -- A short **name** (2–5 words). -- A **one-sentence rationale** explaining *why this angle is distinct from the others*. - -Orthogonality is the hard constraint — two near-duplicate directions defeat the W2S premise. The generation prompt names this explicitly and requires the model to flag and replace any near-duplicates before returning. - -### Phase 3 — Parallel Exploration - -Single Task-tool invocation block with N parallel `Explore` subagents. Each subagent receives: - -- The verbatim idea body. -- Its single assigned direction (name + rationale). -- Instruction to produce a structured mini-proposal with **objective evidence** — references to specific repo paths, existing patterns worth extending, measurable considerations (rough complexity, LOC surface, perf implications) where discoverable. Read-only; no writes. -- Explicit instruction to report "exploratory, no concrete precedent" verbatim if no evidence is found. Fabrication is forbidden. - -Each subagent returns a proposal block with fields: `APPROACH_SUMMARY`, `OBJECTIVE_EVIDENCE` (bullet list), `KNOWN_RISKS`, `CONFIDENCE` (`high` / `medium` / `low`). - -### Phase 4 — Synthesis & Write - -The Lead (main command body, same model context) reviews all returned proposals and: - -1. Picks the strongest direction as **primary**, factoring in: evidence density, fit with repo patterns, implementation surface area, and declared confidence. -2. Populates the template in this order: inferred title → `Original Idea` (verbatim copy of the idea body) → `Primary Direction` section (filled from the chosen proposal) → `Alternative Directions Considered` (each remaining direction in Alt-1..Alt-(N-1) order, with "Why not primary" line) → `Synthesis Notes` (which alt elements could fold into primary). -3. Writes the finalized draft to `OUTPUT_FILE` via `Write`. -4. Reports path + one-line summary to the user. - -## Draft Output Format - -Rendered from `prompt-template/idea/gen-idea-template.md`: - -```markdown -# <Inferred Title> - -## Original Idea -<Verbatim — never paraphrased> - -## Primary Direction: <Name> - -### Rationale -<Why strongest given repo context and evidence.> - -### Approach Summary -<Concrete design: what to build, core mechanism, affected components.> - -### Objective Evidence -- <Code reference: path/to/file — existing pattern we extend> -- <Prior art / precedent> -- <Measurable consideration where available> - -### Known Risks -<Short honest list of what could go wrong.> - -## Alternative Directions Considered - -### Alt-1: <Name> -- Gist: <one-paragraph summary> -- Objective Evidence: - - <bullet> -- Why not primary: <short reason> - -### Alt-2 ... Alt-(N-1) -<Same shape.> - -## Synthesis Notes -<Which elements from alternatives could fold into the primary if the user picks an alt.> -``` - -Two invariants: - -1. `Original Idea` is byte-identical to the user's input. Mirrors `gen-plan`'s "draft is the most valuable human input" principle. -2. The draft is a complete, self-contained design — not a set of open questions. This is what lets it pass `gen-plan` Phase 2 and feed Phase 3 meaningfully. - -## Agent Topology - -``` -user idea (inline | file) - | - v -[Phase 2: Lead — generate N orthogonal directions] - | - +--> [Explore #1, direction A] --+ - +--> [Explore #2, direction B] --| - +--> [Explore #3, direction C] --|--> [Phase 4: Lead — synthesize] - +--> ... --| | - +--> [Explore #N, direction N] --+ v - draft.md -``` - -All parallel subagents are `Explore` (read-only). No new subagent type is introduced. - -## Error Handling - -- **Direction generation returns fewer than N**: retry the Phase 2 call once, asking for exactly N orthogonal directions. After the retry, if at least 2 directions are returned, proceed with the reduced count and log a warning; with fewer than 2, stop. -- **One Explore subagent fails**: drop it and continue synthesis with the rest. With fewer than 2 successful proposals, stop with error `exploration phase degraded; retry`. -- **No objective evidence for a direction**: subagent reports `exploratory, no concrete precedent`; that text is preserved verbatim in the draft. Never fabricate references. -- **Inline idea shorter than 10 characters**: warn and proceed (user's call — some valid ideas are terse). -- **Input file unreadable / not `.md`**: IO validation exits with distinct error code, parallel to `validate-gen-plan-io.sh` semantics. - -## Files to Add or Modify - -1. `commands/gen-idea.md` — new command spec (estimated ~150–200 lines, structured like a lean subset of `gen-plan.md`). -2. `prompt-template/idea/gen-idea-template.md` — new template file matching the format above. -3. `scripts/validate-gen-idea-io.sh` — new IO validation + slug resolution script, modeled on `validate-gen-plan-io.sh`. -4. `README.md` — add a one-line Quick Start entry for `gen-idea` above the `gen-plan` step; bump `Current Version` to `1.16.1`. -5. `.claude-plugin/plugin.json` — bump `version` to `1.16.1`. -6. `.claude-plugin/marketplace.json` — bump `version` to `1.16.1` (three-file version sync is a project-level rule). - -No test harness is added in this first pass. - -## Acceptance (Smoke-Level) - -- `/humanize:gen-idea "add undo/redo to the editor"` writes a `.md` under `.humanize/ideas/` with all required sections populated — one primary direction plus five alternatives (`N=6` total). -- `/humanize:gen-idea notes/rough.md --n 3 --output tmp/draft.md` reads the file, writes to `tmp/draft.md`, with exactly one primary and two alternatives. -- `/humanize:gen-idea ""` stops with a clear "missing idea" error. -- `/humanize:gen-idea "x" --n 1` stops with an out-of-range error. -- `/humanize:gen-idea "x" --output <existing-file>` refuses to overwrite. -- The resulting draft fed into `/humanize:gen-plan --input <draft> --output plan.md` passes Phase 2 relevance check and produces a structured plan without human edits. - -## Future Extensions (Out of Scope Here) - -- Codex pass over the synthesized draft for independent sanity check. -- Optional `--chain-to-gen-plan` flag that invokes `gen-plan` on the written draft. -- `--directions "..."` override for users who want to pin angles. -- Config-loader integration for alternative-language draft variants. -- Relevance check mirroring `gen-plan` Phase 2 to fail early when an idea is clearly unrelated to the repo. From 0e6a5b3f6a66c3cadcac70ae57c7e40c4084c409 Mon Sep 17 00:00:00 2001 From: Sihao Liu <sihao@cs.ucla.edu> Date: Mon, 20 Apr 2026 16:48:49 -0700 Subject: [PATCH 94/97] fix(gen-idea): treat whitespace-containing .md args as inline An argument ending in .md was unconditionally classified as a path, so inline ideas that mention a markdown filename (for example "rename README.md") failed validation with INPUT_NOT_FOUND instead of being accepted as inline text. Gate the .md heuristic on the same "no whitespace" rule the "/" branch already applied. Ideas with any whitespace fall through to inline mode, while clean strings like "./idea.md" or "path/to/idea.md" still surface a helpful INPUT_NOT_FOUND error when the file is absent. --- scripts/validate-gen-idea-io.sh | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/scripts/validate-gen-idea-io.sh b/scripts/validate-gen-idea-io.sh index cc106f87..ef142fb6 100755 --- a/scripts/validate-gen-idea-io.sh +++ b/scripts/validate-gen-idea-io.sh @@ -86,17 +86,18 @@ INPUT_MODE="" IDEA_BODY_FILE="" SLUG="" -# Detect whether IDEA_INPUT is meant as a file path. The `-f` test on line -# below is the primary gate; this heuristic only matters when that test fails -# and we must decide whether to emit INPUT_NOT_FOUND (user meant a path) or -# treat the text as inline. Limitation: a path that contains spaces AND does -# not exist falls through to inline mode silently, because the space rule is -# how we avoid misclassifying inline ideas that happen to contain "/". +# Detect whether IDEA_INPUT is meant as a file path. The `-f` test below is +# the primary gate; this heuristic only matters when that test fails and we +# must decide whether to emit INPUT_NOT_FOUND (user meant a path) or treat +# the text as inline. Any whitespace disqualifies the input from path mode, +# so inline ideas that happen to mention a filename like "rename README.md" +# or that contain "/" fall through to inline. Limitation: a real path that +# contains whitespace and does not exist is silently treated as inline. looks_like_path=false -if [[ "$IDEA_INPUT" == *.md ]]; then - looks_like_path=true -elif [[ "$IDEA_INPUT" == */* && "$IDEA_INPUT" != *" "* ]]; then - looks_like_path=true +if [[ "$IDEA_INPUT" != *[[:space:]]* ]]; then + if [[ "$IDEA_INPUT" == *.md || "$IDEA_INPUT" == */* ]]; then + looks_like_path=true + fi fi if [[ -f "$IDEA_INPUT" ]]; then From 04116b90ce15a256459ab7384a0066358a3d7932 Mon Sep 17 00:00:00 2001 From: Sihao Liu <sihao@cs.ucla.edu> Date: Mon, 20 Apr 2026 16:49:41 -0700 Subject: [PATCH 95/97] fix(gen-idea): reject unreadable markdown input files The file-input branch validated existence, .md extension, and non-empty content but never checked readability. An unreadable regular .md file passed validation and then triggered a less actionable runtime failure when the command itself tried to read IDEA_BODY_FILE. Insert an -r readability check between the extension and emptiness checks. Emit VALIDATION_ERROR: INPUT_NOT_READABLE with exit 2, aligning with the advertised exit-code path ("missing, not readable, or not .md") in the command spec. --- scripts/validate-gen-idea-io.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/validate-gen-idea-io.sh b/scripts/validate-gen-idea-io.sh index ef142fb6..297b4df5 100755 --- a/scripts/validate-gen-idea-io.sh +++ b/scripts/validate-gen-idea-io.sh @@ -4,7 +4,7 @@ # Exit codes: # 0 - Success # 1 - Missing idea input or empty input file -# 2 - Input looks like a path but is unreadable, not .md, or does not exist +# 2 - Input looks like a path but is not readable, not .md, or does not exist # 3 - Output parent directory does not exist (user-supplied path only) # 4 - Output file already exists # 5 - No write permission to output directory @@ -106,6 +106,11 @@ if [[ -f "$IDEA_INPUT" ]]; then echo "File input must have .md extension; got: $IDEA_INPUT" exit 2 fi + if [[ ! -r "$IDEA_INPUT" ]]; then + echo "VALIDATION_ERROR: INPUT_NOT_READABLE" + echo "Input file is not readable: $IDEA_INPUT" + exit 2 + fi if [[ ! -s "$IDEA_INPUT" ]]; then echo "VALIDATION_ERROR: INPUT_EMPTY" echo "Input file is empty: $IDEA_INPUT" From 3e891f329b7dde8d7cd432575a89f796fb48654f Mon Sep 17 00:00:00 2001 From: Sihao Liu <sihao@cs.ucla.edu> Date: Mon, 20 Apr 2026 17:12:30 -0700 Subject: [PATCH 96/97] fix(gen-idea): emit inline idea body via stdout sentinel Inline mode created a per-run mktemp file under TMPDIR and deliberately left it in place so the caller could read it after the script exited. On long-lived developer machines or CI workers this produced unbounded tempfile growth and retained user-provided idea text on disk beyond command completion. Eliminate the tempfile entirely. The script now emits the inline idea body on stdout between the literal sentinel lines "=== IDEA_BODY_BEGIN ===" and "=== IDEA_BODY_END ===". The caller extracts the body from that block when INPUT_MODE is inline, and still reads IDEA_BODY_FILE from disk in file mode. IDEA_BODY_FILE is now only emitted in file mode; stdout contract for inline mode gains the sentinel block but drops the on-disk handoff. Update commands/gen-idea.md to document the new mode-aware body retrieval and to reference the in-memory IDEA_BODY value in the Phase 3 subagent prompt and Phase 4 template placeholder. --- commands/gen-idea.md | 12 ++++++++---- scripts/validate-gen-idea-io.sh | 19 +++++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/commands/gen-idea.md b/commands/gen-idea.md index 2897a818..2ef61e82 100644 --- a/commands/gen-idea.md +++ b/commands/gen-idea.md @@ -51,7 +51,7 @@ Run: ``` Handle exit codes: -- `0`: Parse stdout to extract `INPUT_MODE`, `IDEA_BODY_FILE`, `OUTPUT_FILE`, `SLUG`, `TEMPLATE_FILE`, `N` (each appears on its own `KEY: value` line). Continue to Phase 2. (`SLUG` is informational — the script has already incorporated it into `OUTPUT_FILE`, so later phases do not need to use `SLUG` directly.) +- `0`: Parse stdout to extract `INPUT_MODE`, `OUTPUT_FILE`, `SLUG`, `TEMPLATE_FILE`, `N` (each appears on its own `KEY: value` line). When `INPUT_MODE` is `file`, stdout additionally contains an `IDEA_BODY_FILE: <path>` line; extract that too. Continue to Phase 2. (`SLUG` is informational — the script has already incorporated it into `OUTPUT_FILE`, so later phases do not need to use `SLUG` directly.) - `1`: Report "Missing or empty idea input" and stop. - `2`: Report "Input looks like a file path but is missing, not readable, or not `.md`" and stop. - `3`: Report "Output directory does not exist — please create it or choose a different path" and stop. @@ -62,7 +62,11 @@ Handle exit codes: Before `VALIDATION_SUCCESS`, stdout may contain one or more lines starting with `WARNING:` (for example, `WARNING: short idea (<N> chars); proceeding` when an inline idea is under 10 characters). Surface these warnings to the user in your final report but continue Phase 2 normally. `WARNING:` lines are informational, not errors. -Read the full contents of `IDEA_BODY_FILE` using the `Read` tool. Preserve byte-identical content in memory for later phases. +Obtain the idea body into memory as `IDEA_BODY`, based on `INPUT_MODE`: +- `inline`: stdout contains a sentinel block at the end of the success output; extract all text between the `=== IDEA_BODY_BEGIN ===` and `=== IDEA_BODY_END ===` lines (exclusive). The script emits a trailing newline after the last body line. +- `file`: read the full contents of `IDEA_BODY_FILE` using the `Read` tool. + +Preserve byte-identical content in memory for later phases. No on-disk tempfile is created in inline mode — the stdout sentinel block is the authoritative source. --- @@ -109,7 +113,7 @@ Dispatch all directions in a **single Task-tool message** containing one Task in For each direction in `DIRECTIONS`, launch one `Explore` subagent. Each invocation prompt MUST include: -1. A verbatim copy of the idea body loaded from `IDEA_BODY_FILE` in Phase 1. +1. A verbatim copy of the idea body (`IDEA_BODY`) captured in Phase 1. 2. The assigned direction (name + rationale). 3. The following instruction block (reproduce verbatim in the subagent prompt): @@ -161,7 +165,7 @@ Read the template file located at `TEMPLATE_FILE` (from Phase 1 stdout). Produce the finalized draft content in memory by replacing placeholders: - `<TITLE>` — the inferred title. -- `<ORIGINAL_IDEA>` — byte-identical contents of `IDEA_BODY_FILE`. Preserve line breaks, trailing newline, and all formatting. Do NOT paraphrase or re-indent. +- `<ORIGINAL_IDEA>` — byte-identical value of `IDEA_BODY` captured in Phase 1. Preserve line breaks, trailing newline, and all formatting. Do NOT paraphrase or re-indent. - `<PRIMARY_NAME>` — primary direction's short name. - `<PRIMARY_RATIONALE>` — primary direction's rationale (from Phase 2). - `<PRIMARY_APPROACH_SUMMARY>` — primary proposal's `APPROACH_SUMMARY`. diff --git a/scripts/validate-gen-idea-io.sh b/scripts/validate-gen-idea-io.sh index 297b4df5..99c4bb1a 100755 --- a/scripts/validate-gen-idea-io.sh +++ b/scripts/validate-gen-idea-io.sh @@ -125,14 +125,10 @@ elif [[ "$looks_like_path" == true ]]; then echo "Looks like a file path but does not exist: $IDEA_INPUT" exit 2 else + # Inline mode emits the idea body on stdout inside a sentinel block, + # so the caller does not need to consume an on-disk tempfile. This + # avoids leaking user-provided text under $TMPDIR on repeated runs. INPUT_MODE="inline" - # Deliberately no `trap ... EXIT` to remove TMPFILE: the caller consumes - # IDEA_BODY_FILE after this script exits, so a naive trap would delete - # the file the caller needs. On error paths the tempfile is leaked in - # $TMPDIR; the OS sweeps $TMPDIR on reboot and the cost is negligible. - TMPFILE="$(mktemp "${TMPDIR:-/tmp}/gen-idea-inline-XXXXXX")" - printf '%s\n' "$IDEA_INPUT" > "$TMPFILE" - IDEA_BODY_FILE="$TMPFILE" if (( ${#IDEA_INPUT} < 10 )); then echo "WARNING: short idea (${#IDEA_INPUT} chars); proceeding" fi @@ -192,9 +188,16 @@ fi echo "VALIDATION_SUCCESS" echo "INPUT_MODE: $INPUT_MODE" -echo "IDEA_BODY_FILE: $IDEA_BODY_FILE" +if [[ "$INPUT_MODE" == "file" ]]; then + echo "IDEA_BODY_FILE: $IDEA_BODY_FILE" +fi echo "OUTPUT_FILE: $OUTPUT_FILE" echo "SLUG: $SLUG" echo "TEMPLATE_FILE: $TEMPLATE_FILE" echo "N: $N" +if [[ "$INPUT_MODE" == "inline" ]]; then + echo "=== IDEA_BODY_BEGIN ===" + printf '%s\n' "$IDEA_INPUT" + echo "=== IDEA_BODY_END ===" +fi exit 0 From 3c7fa7d57ff03d4866c55c79f75a6fce9b8271a7 Mon Sep 17 00:00:00 2001 From: Sihao Liu <sihao@cs.ucla.edu> Date: Sat, 25 Apr 2026 07:36:40 -0700 Subject: [PATCH 97/97] chore: bump default Codex model from gpt-5.4 to gpt-5.5 --- config/default_config.json | 2 +- docs/install-for-codex.md | 4 +-- docs/install-for-kimi.md | 4 +-- docs/usage.md | 10 +++--- hooks/lib/loop-common.sh | 6 ++-- scripts/ask-codex.sh | 4 +-- scripts/install-skill.sh | 2 +- skills/ask-codex/SKILL.md | 4 +-- skills/humanize-rlcr/SKILL.md | 4 +-- skills/humanize/SKILL.md | 4 +-- .../robustness/test-state-file-robustness.sh | 10 +++--- tests/test-agent-teams.sh | 2 +- tests/test-bitlesson-select-routing.sh | 6 ++-- tests/test-codex-hook-install.sh | 4 +-- tests/test-disable-nested-codex-hooks.sh | 2 +- tests/test-finalize-phase.sh | 2 +- tests/test-plan-file-validation.sh | 4 +-- tests/test-session-id.sh | 2 +- tests/test-skill-monitor.sh | 32 ++++++++--------- tests/test-stop-gate.sh | 2 +- tests/test-stop-hook-bg-allow.sh | 26 +++++++------- tests/test-stop-hook-legacy-compat.sh | 2 +- tests/test-unified-codex-config.sh | 34 +++++++++---------- 23 files changed, 86 insertions(+), 86 deletions(-) diff --git a/config/default_config.json b/config/default_config.json index 6238dce2..af41af12 100644 --- a/config/default_config.json +++ b/config/default_config.json @@ -1,5 +1,5 @@ { - "codex_model": "gpt-5.4", + "codex_model": "gpt-5.5", "codex_effort": "high", "bitlesson_model": "haiku", "agent_teams": false, diff --git a/docs/install-for-codex.md b/docs/install-for-codex.md index 5d34b24c..2c70a1cc 100644 --- a/docs/install-for-codex.md +++ b/docs/install-for-codex.md @@ -29,7 +29,7 @@ This will: - Enable the experimental `codex_hooks` feature in `${CODEX_HOME:-~/.codex}/config.toml` when `codex` is available - Seed `~/.config/humanize/config.json` with a Codex/OpenAI `bitlesson_model` when that key is not already set - Mark the install as `provider_mode: "codex-only"` when using `--target codex` -- Use RLCR defaults: `codex exec` with `gpt-5.4:high`, `codex review` with `gpt-5.4:high` +- Use RLCR defaults: `codex exec` with `gpt-5.5:high`, `codex review` with `gpt-5.5:high` Requires Codex CLI `0.114.0` or newer for native hooks. Older Codex builds are not supported by the Codex install path. @@ -77,7 +77,7 @@ sed -n '1,220p' "${CODEX_HOME:-$HOME/.codex}/hooks.json" Expected: - `codex_hooks` is `true` - `hooks.json` contains `loop-codex-stop-hook.sh` -- `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` contains `bitlesson_model` set to a Codex/OpenAI model such as `gpt-5.4` +- `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` contains `bitlesson_model` set to a Codex/OpenAI model such as `gpt-5.5` - for `--target codex`, `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` also contains `provider_mode: "codex-only"` ## Optional: Install for Both Codex and Kimi diff --git a/docs/install-for-kimi.md b/docs/install-for-kimi.md index ee217d70..c947ffac 100644 --- a/docs/install-for-kimi.md +++ b/docs/install-for-kimi.md @@ -169,7 +169,7 @@ Look for the "Skills" section in the help output. |--------|-------------|---------| | `path/to/plan.md` | Plan file path | Required (unless --skip-impl) | | `--max N` | Maximum iterations | 42 | -| `--codex-model MODEL:EFFORT` | Codex model | gpt-5.4:high | +| `--codex-model MODEL:EFFORT` | Codex model | gpt-5.5:high | | `--codex-timeout SECONDS` | Review timeout | 5400 | | `--base-branch BRANCH` | Base for code review | auto-detect | | `--full-review-round N` | Full alignment check interval | 5 | @@ -191,7 +191,7 @@ Ensure you have `codex` CLI installed: codex --version ``` -The skills will use `gpt-5.4` with `high` effort level by default. +The skills will use `gpt-5.5` with `high` effort level by default. ## Uninstall diff --git a/docs/usage.md b/docs/usage.md index 4234b39d..313ae7ce 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -76,7 +76,7 @@ OPTIONS: --plan-file <path> Explicit plan file path (alternative to positional arg) --max <N> Maximum iterations before auto-stop (default: 42) --codex-model <MODEL:EFFORT> - Codex model and reasoning effort (default from config, fallback gpt-5.4:high) + Codex model and reasoning effort (default from config, fallback gpt-5.5:high) --codex-timeout <SECONDS> Timeout for each Codex review in seconds (default: 5400) --track-plan-file Indicate plan file should be tracked in git (must be clean) @@ -240,7 +240,7 @@ translated plan and QA variants by inserting `_<code>` before the file extension OPTIONS: --codex-model <MODEL:EFFORT> - Codex model and reasoning effort (default from config, fallback gpt-5.4:high) + Codex model and reasoning effort (default from config, fallback gpt-5.5:high) --codex-timeout <SECONDS> Timeout for the Codex query in seconds (default: 3600) -h, --help Show help message @@ -265,7 +265,7 @@ Current built-in keys: | Key | Default | Description | |-----|---------|-------------| -| `codex_model` | `gpt-5.4` | Shared default model for Codex-backed review and analysis | +| `codex_model` | `gpt-5.5` | Shared default model for Codex-backed review and analysis | | `codex_effort` | `high` | Shared default reasoning effort (`xhigh`, `high`, `medium`, `low`) | | `bitlesson_model` | `haiku` | Model used by the BitLesson selector agent | | `provider_mode` | unset | Optional runtime mode hint such as `codex-only` | @@ -279,7 +279,7 @@ All Codex-using features (RLCR loop, ask-codex) share the same model configurati | Key | Default | Description | |-----|---------|-------------| -| `codex_model` | `gpt-5.4` | Model used for Codex operations (reviews, analysis, queries) | +| `codex_model` | `gpt-5.5` | Model used for Codex operations (reviews, analysis, queries) | | `codex_effort` | `high` | Reasoning effort (`xhigh`, `high`, `medium`, `low`) | To override, add to `.humanize/config.json`: @@ -300,7 +300,7 @@ Codex model is resolved with this precedence: 1. CLI `--codex-model` flag (highest priority) 2. Feature-specific defaults 3. Config-backed defaults from the 4-layer hierarchy above -4. Hardcoded fallback (`gpt-5.4:high`) +4. Hardcoded fallback (`gpt-5.5:high`) **Migration note**: If your `.humanize/config.json` contains the legacy keys `loop_reviewer_model` or `loop_reviewer_effort`, they are silently ignored. diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 3e54e6ef..5726b23b 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -54,7 +54,7 @@ readonly DRIFT_STATUS_REPLAN_REQUIRED="replan_required" # Default Codex configuration (single source of truth - all scripts reference this) # Scripts can pre-set DEFAULT_CODEX_MODEL/DEFAULT_CODEX_EFFORT before sourcing to override. # Config-backed defaults are loaded from the merge hierarchy after config-loader.sh is sourced. -# Precedence: pre-set value > config value > hardcoded fallback (gpt-5.4/high) +# Precedence: pre-set value > config value > hardcoded fallback (gpt-5.5/high) # # The actual assignment happens in the "Config-backed defaults" section below, # after config-loader.sh has been sourced and merged config is available. @@ -207,7 +207,7 @@ DEFAULT_BITLESSON_MODEL="${DEFAULT_BITLESSON_MODEL:-haiku}" # Load codex model/effort from merged config so .humanize/config.json can set persistent # defaults for all Codex-using features (RLCR, ask-codex). -# Precedence: pre-set by caller > config value > hardcoded fallback (gpt-5.4/high) +# Precedence: pre-set by caller > config value > hardcoded fallback (gpt-5.5/high) _cfg_codex_model="$(get_config_value "$_LOOP_COMMON_CONFIG" "codex_model" 2>/dev/null || true)" if [[ -n "$_cfg_codex_model" && ! "$_cfg_codex_model" =~ ^[a-zA-Z0-9._-]+$ ]]; then echo "Warning: Invalid codex_model in merged config: $_cfg_codex_model" >&2 @@ -219,7 +219,7 @@ elif [[ -n "$_cfg_codex_model" && ! "$_cfg_codex_model" =~ ^(gpt-|o[0-9]) ]]; th echo " Ignoring configured codex_model; using caller preset or fallback" >&2 _cfg_codex_model="" fi -DEFAULT_CODEX_MODEL="${DEFAULT_CODEX_MODEL:-${_cfg_codex_model:-gpt-5.4}}" +DEFAULT_CODEX_MODEL="${DEFAULT_CODEX_MODEL:-${_cfg_codex_model:-gpt-5.5}}" _cfg_codex_effort="$(get_config_value "$_LOOP_COMMON_CONFIG" "codex_effort" 2>/dev/null || true)" if [[ -n "$_cfg_codex_effort" && ! "$_cfg_codex_effort" =~ ^(xhigh|high|medium|low)$ ]]; then echo "Warning: Invalid codex_effort in merged config: $_cfg_codex_effort" >&2 diff --git a/scripts/ask-codex.sh b/scripts/ask-codex.sh index 47ffeab0..fee439a8 100755 --- a/scripts/ask-codex.sh +++ b/scripts/ask-codex.sh @@ -55,7 +55,7 @@ USAGE: OPTIONS: --codex-model <MODEL:EFFORT> - Codex model and reasoning effort (default from config, fallback gpt-5.4:high) + Codex model and reasoning effort (default from config, fallback gpt-5.5:high) --codex-timeout <SECONDS> Timeout for the Codex query in seconds (default: 3600) -h, --help Show this help message @@ -68,7 +68,7 @@ DESCRIPTION: EXAMPLES: /humanize:ask-codex How should I structure the authentication module? - /humanize:ask-codex --codex-model gpt-5.4:high What are the performance bottlenecks? + /humanize:ask-codex --codex-model gpt-5.5:high What are the performance bottlenecks? /humanize:ask-codex --codex-timeout 300 Review the error handling in src/api/ ENVIRONMENT: diff --git a/scripts/install-skill.sh b/scripts/install-skill.sh index fa546618..3106201d 100755 --- a/scripts/install-skill.sh +++ b/scripts/install-skill.sh @@ -296,7 +296,7 @@ user_config = pathlib.Path(sys.argv[2]) install_target = sys.argv[3] defaults = json.loads(default_config.read_text(encoding="utf-8")) -default_codex_model = defaults.get("codex_model") or "gpt-5.4" +default_codex_model = defaults.get("codex_model") or "gpt-5.5" if user_config.exists(): try: diff --git a/skills/ask-codex/SKILL.md b/skills/ask-codex/SKILL.md index 486e8fcb..a09a6b0b 100644 --- a/skills/ask-codex/SKILL.md +++ b/skills/ask-codex/SKILL.md @@ -24,7 +24,7 @@ If the user supplied flags such as `--codex-model` or `--codex-timeout`, reconst Example: ```bash -"${CLAUDE_PLUGIN_ROOT}/scripts/ask-codex.sh" --codex-model gpt-5.4:high "Review the following round summary (M4)..." +"${CLAUDE_PLUGIN_ROOT}/scripts/ask-codex.sh" --codex-model gpt-5.5:high "Review the following round summary (M4)..." ``` Never run this unsafe form: @@ -53,4 +53,4 @@ because the shell will re-parse the question text and can fail before `ask-codex ## Notes - The response is saved to `.humanize/skill/<timestamp>/output.md` for reference -- Default model is `gpt-5.4:high` with a 3600-second timeout +- Default model is `gpt-5.5:high` with a 3600-second timeout diff --git a/skills/humanize-rlcr/SKILL.md b/skills/humanize-rlcr/SKILL.md index d9873b47..6166e1b7 100644 --- a/skills/humanize-rlcr/SKILL.md +++ b/skills/humanize-rlcr/SKILL.md @@ -83,7 +83,7 @@ Pass these through `setup-rlcr-loop.sh`: | `--plan-file <path>` | Explicit plan path | - | | `--track-plan-file` | Enforce tracked plan immutability | false | | `--max N` | Maximum iterations | 42 | -| `--codex-model MODEL:EFFORT` | Codex model and effort for `codex exec` | gpt-5.4:high | +| `--codex-model MODEL:EFFORT` | Codex model and effort for `codex exec` | gpt-5.5:high | | `--codex-timeout SECONDS` | Codex timeout | 5400 | | `--base-branch BRANCH` | Base for review phase | auto-detect | | `--full-review-round N` | Full alignment interval | 5 | @@ -94,7 +94,7 @@ Pass these through `setup-rlcr-loop.sh`: | `--yolo` | Skip quiz and enable --claude-answer-codex | false | | `--skip-quiz` | Skip Plan Understanding Quiz (implicit in skill mode) | false | -Review phase `codex review` runs with `gpt-5.4:high`. +Review phase `codex review` runs with `gpt-5.5:high`. ## Usage diff --git a/skills/humanize/SKILL.md b/skills/humanize/SKILL.md index 5b0a6af3..558e7e1d 100644 --- a/skills/humanize/SKILL.md +++ b/skills/humanize/SKILL.md @@ -72,8 +72,8 @@ After each round, write the required summary and stop/exit normally. Humanize's **Common Options:** - `--max N` - Maximum iterations before auto-stop (default: 42) -- `--codex-model MODEL:EFFORT` - Codex model and reasoning effort for `codex exec` (default: gpt-5.4:high) -- Review phase `codex review` uses `gpt-5.4:high` +- `--codex-model MODEL:EFFORT` - Codex model and reasoning effort for `codex exec` (default: gpt-5.5:high) +- Review phase `codex review` uses `gpt-5.5:high` - `--codex-timeout SECONDS` - Timeout for each Codex review (default: 5400) - `--base-branch BRANCH` - Base branch for code review (auto-detects if not specified) - `--full-review-round N` - Interval for full alignment checks (default: 5) diff --git a/tests/robustness/test-state-file-robustness.sh b/tests/robustness/test-state-file-robustness.sh index 5b694aaa..87cdfe41 100755 --- a/tests/robustness/test-state-file-robustness.sh +++ b/tests/robustness/test-state-file-robustness.sh @@ -36,7 +36,7 @@ cat > "$TEST_DIR/state.md" << 'EOF' --- current_round: 5 max_iterations: 10 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 5400 push_every_round: false @@ -79,7 +79,7 @@ max_iterations: 20 extra_field: some_value another_extra: 12345 custom_metadata: true -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 5400 --- @@ -404,7 +404,7 @@ cat > "$TEST_DIR/state-full-review.md" << 'EOF' current_round: 3 max_iterations: 20 full_review_round: 7 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high plan_file: plan.md plan_tracked: false @@ -429,7 +429,7 @@ cat > "$TEST_DIR/state-no-full-review.md" << 'EOF' --- current_round: 2 max_iterations: 15 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high plan_file: plan.md plan_tracked: false @@ -455,7 +455,7 @@ cat > "$TEST_DIR/state-min-full-review.md" << 'EOF' current_round: 1 max_iterations: 10 full_review_round: 2 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high plan_file: plan.md plan_tracked: false diff --git a/tests/test-agent-teams.sh b/tests/test-agent-teams.sh index a62cf5b5..ef67073d 100755 --- a/tests/test-agent-teams.sh +++ b/tests/test-agent-teams.sh @@ -474,7 +474,7 @@ GI_EOF --- current_round: $round max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 5400 push_every_round: false diff --git a/tests/test-bitlesson-select-routing.sh b/tests/test-bitlesson-select-routing.sh index bd23ab45..68ecfa13 100755 --- a/tests/test-bitlesson-select-routing.sh +++ b/tests/test-bitlesson-select-routing.sh @@ -393,7 +393,7 @@ echo "" setup_test_dir create_real_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" -printf '{"bitlesson_model": "haiku", "codex_model": "gpt-5.4", "provider_mode": "codex-only"}' > "$TEST_DIR/.humanize/config.json" +printf '{"bitlesson_model": "haiku", "codex_model": "gpt-5.5", "provider_mode": "codex-only"}' > "$TEST_DIR/.humanize/config.json" FALLBACK_BIN="$TEST_DIR/fallback-bin" create_mock_codex "$FALLBACK_BIN" @@ -419,7 +419,7 @@ echo "" setup_test_dir create_mock_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" -printf '{"bitlesson_model": "gpt-5.4"}' > "$TEST_DIR/.humanize/config.json" +printf '{"bitlesson_model": "gpt-5.5"}' > "$TEST_DIR/.humanize/config.json" exit_code=0 stdout_out="" @@ -443,7 +443,7 @@ echo "" setup_test_dir create_real_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" -printf '{"bitlesson_model": "gpt-5.4"}' > "$TEST_DIR/.humanize/config.json" +printf '{"bitlesson_model": "gpt-5.5"}' > "$TEST_DIR/.humanize/config.json" CAPTURE_BIN="$TEST_DIR/capture-bin" mkdir -p "$CAPTURE_BIN" cat > "$CAPTURE_BIN/codex" <<'EOF' diff --git a/tests/test-codex-hook-install.sh b/tests/test-codex-hook-install.sh index 2d70bb2d..da20fb96 100755 --- a/tests/test-codex-hook-install.sh +++ b/tests/test-codex-hook-install.sh @@ -151,11 +151,11 @@ else fail "Codex install writes a PATH-ready bitlesson-selector shim" "$COMMAND_BIN_DIR/bitlesson-selector exists" "missing" fi -if [[ "$(jq -r '.bitlesson_model // empty' "$HUMANIZE_USER_CONFIG")" == "gpt-5.4" ]]; then +if [[ "$(jq -r '.bitlesson_model // empty' "$HUMANIZE_USER_CONFIG")" == "gpt-5.5" ]]; then pass "Codex install seeds bitlesson_model with a Codex/OpenAI model" else fail "Codex install seeds bitlesson_model with a Codex/OpenAI model" \ - "gpt-5.4" "$(jq -c '.' "$HUMANIZE_USER_CONFIG" 2>/dev/null || echo MISSING)" + "gpt-5.5" "$(jq -c '.' "$HUMANIZE_USER_CONFIG" 2>/dev/null || echo MISSING)" fi if [[ "$(jq -r '.provider_mode // empty' "$HUMANIZE_USER_CONFIG")" == "codex-only" ]]; then diff --git a/tests/test-disable-nested-codex-hooks.sh b/tests/test-disable-nested-codex-hooks.sh index 7178052f..c240ad65 100755 --- a/tests/test-disable-nested-codex-hooks.sh +++ b/tests/test-disable-nested-codex-hooks.sh @@ -130,7 +130,7 @@ start_branch: $current_branch base_branch: $current_branch base_commit: $base_commit push_every_round: false -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 120 review_started: $review_started diff --git a/tests/test-finalize-phase.sh b/tests/test-finalize-phase.sh index 75949696..03a3e408 100755 --- a/tests/test-finalize-phase.sh +++ b/tests/test-finalize-phase.sh @@ -219,7 +219,7 @@ setup_loop_dir() { --- current_round: $round max_iterations: $max_iter -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 5400 push_every_round: false diff --git a/tests/test-plan-file-validation.sh b/tests/test-plan-file-validation.sh index dabb9395..a6b9cc40 100755 --- a/tests/test-plan-file-validation.sh +++ b/tests/test-plan-file-validation.sh @@ -717,7 +717,7 @@ fi echo "Test 13: Reject codex effort with YAML-unsafe characters" rm -rf "$TEST_DIR/.humanize/rlcr" 2>/dev/null || true set +e -RESULT=$("$PROJECT_ROOT/scripts/setup-rlcr-loop.sh" --codex-model "gpt-5.4:high#comment" "plans/test-plan.md" 2>&1) +RESULT=$("$PROJECT_ROOT/scripts/setup-rlcr-loop.sh" --codex-model "gpt-5.5:high#comment" "plans/test-plan.md" 2>&1) EXIT_CODE=$? set -e if [[ $EXIT_CODE -ne 0 ]] && echo "$RESULT" | grep -q "Invalid codex effort"; then @@ -729,7 +729,7 @@ fi # Test 14: Accept valid codex model with dots and hyphens echo "Test 14: Accept valid codex model (alphanumeric, dots, hyphens)" set +e -RESULT=$("$PROJECT_ROOT/scripts/setup-rlcr-loop.sh" --codex-model "gpt-5.4:medium" "plans/test-plan.md" 2>&1) +RESULT=$("$PROJECT_ROOT/scripts/setup-rlcr-loop.sh" --codex-model "gpt-5.5:medium" "plans/test-plan.md" 2>&1) EXIT_CODE=$? set -e # Should not fail due to model/effort validation (may fail later for other reasons) diff --git a/tests/test-session-id.sh b/tests/test-session-id.sh index 4bd50577..33de5ba1 100755 --- a/tests/test-session-id.sh +++ b/tests/test-session-id.sh @@ -120,7 +120,7 @@ cat > "$TEST_DIR/project/.humanize/rlcr/2026-01-01_00-00-00/state.md" << 'EOF' --- current_round: 0 max_iterations: 10 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 5400 push_every_round: false diff --git a/tests/test-skill-monitor.sh b/tests/test-skill-monitor.sh index fca46b7e..4dcf708b 100755 --- a/tests/test-skill-monitor.sh +++ b/tests/test-skill-monitor.sh @@ -58,7 +58,7 @@ setup_test_env() { create_skill_invocation() { local unique_id="$1" local status="$2" - local model="${3:-gpt-5.4}" + local model="${3:-gpt-5.5}" local effort="${4:-high}" local duration="${5:-15s}" local question="${6:-How should I structure this?}" @@ -140,7 +140,7 @@ echo "=== Skill Monitor: Single Invocation ===" setup_test_env mkdir -p .humanize/skill -create_skill_invocation "2026-02-19_21-02-35-12345-abc123" "success" "gpt-5.4" "high" "15s" "How should I structure the auth module?" +create_skill_invocation "2026-02-19_21-02-35-12345-abc123" "success" "gpt-5.5" "high" "15s" "How should I structure the auth module?" output=$(_humanize_monitor_skill --once 2>&1) && rc=0 || rc=$? if [[ $rc -eq 0 ]]; then @@ -167,7 +167,7 @@ else fail "Should show success status" "got: $output" fi -if grep -q "gpt-5.4" <<< "$output"; then +if grep -q "gpt-5.5" <<< "$output"; then pass "Shows model name" else fail "Should show model" "got: $output" @@ -199,10 +199,10 @@ echo "=== Skill Monitor: Multiple Invocations ===" setup_test_env mkdir -p .humanize/skill -create_skill_invocation "2026-02-19_20-00-00-111-aaa" "success" "gpt-5.4" "high" "10s" "First question" -create_skill_invocation "2026-02-19_20-30-00-222-bbb" "error" "gpt-5.4" "high" "5s" "Second question" -create_skill_invocation "2026-02-19_21-00-00-333-ccc" "timeout" "gpt-5.4" "high" "3600s" "Third question" -create_skill_invocation "2026-02-19_21-30-00-444-ddd" "success" "gpt-5.4" "high" "20s" "Latest question" +create_skill_invocation "2026-02-19_20-00-00-111-aaa" "success" "gpt-5.5" "high" "10s" "First question" +create_skill_invocation "2026-02-19_20-30-00-222-bbb" "error" "gpt-5.5" "high" "5s" "Second question" +create_skill_invocation "2026-02-19_21-00-00-333-ccc" "timeout" "gpt-5.5" "high" "3600s" "Third question" +create_skill_invocation "2026-02-19_21-30-00-444-ddd" "success" "gpt-5.5" "high" "20s" "Latest question" output=$(_humanize_monitor_skill --once 2>&1) && rc=0 || rc=$? if grep -q "Total Invocations: 4" <<< "$output"; then @@ -250,8 +250,8 @@ echo "=== Skill Monitor: Running Invocation ===" setup_test_env mkdir -p .humanize/skill -create_skill_invocation "2026-02-19_21-00-00-111-aaa" "success" "gpt-5.4" "high" "10s" "Completed question" -create_skill_invocation "2026-02-19_21-30-00-222-bbb" "running" "gpt-5.4" "high" "" "Running question" +create_skill_invocation "2026-02-19_21-00-00-111-aaa" "success" "gpt-5.5" "high" "10s" "Completed question" +create_skill_invocation "2026-02-19_21-30-00-222-bbb" "running" "gpt-5.5" "high" "" "Running question" output=$(_humanize_monitor_skill --once 2>&1) && rc=0 || rc=$? if grep -q "Running: 1" <<< "$output"; then @@ -274,9 +274,9 @@ echo "=== Skill Monitor: Recent Invocations List ===" setup_test_env mkdir -p .humanize/skill -create_skill_invocation "2026-02-19_20-00-00-111-aaa" "success" "gpt-5.4" "high" "10s" "Question one" -create_skill_invocation "2026-02-19_20-30-00-222-bbb" "error" "gpt-5.4" "high" "5s" "Question two" -create_skill_invocation "2026-02-19_21-00-00-333-ccc" "success" "gpt-5.4" "high" "20s" "Question three" +create_skill_invocation "2026-02-19_20-00-00-111-aaa" "success" "gpt-5.5" "high" "10s" "Question one" +create_skill_invocation "2026-02-19_20-30-00-222-bbb" "error" "gpt-5.5" "high" "5s" "Question two" +create_skill_invocation "2026-02-19_21-00-00-333-ccc" "success" "gpt-5.5" "high" "20s" "Question three" output=$(_humanize_monitor_skill --once 2>&1) && rc=0 || rc=$? if grep -q "Recent Invocations" <<< "$output"; then @@ -314,13 +314,13 @@ Additional context about the question. ## Configuration -- Model: gpt-5.4 +- Model: gpt-5.5 - Effort: high - Timeout: 3600s EOF cat > "$local_dir/metadata.md" << 'EOF' --- -model: gpt-5.4 +model: gpt-5.5 effort: high timeout: 3600 exit_code: 0 @@ -353,7 +353,7 @@ echo "=== Skill Monitor: Empty Response ===" setup_test_env mkdir -p .humanize/skill -create_skill_invocation "2026-02-19_21-00-00-111-aaa" "empty_response" "gpt-5.4" "high" "30s" "Why is the sky blue?" +create_skill_invocation "2026-02-19_21-00-00-111-aaa" "empty_response" "gpt-5.5" "high" "30s" "Why is the sky blue?" output=$(_humanize_monitor_skill --once 2>&1) && rc=0 || rc=$? if grep -q "Empty: 1" <<< "$output"; then @@ -376,7 +376,7 @@ echo "=== Skill Monitor: Non-skill Dir Filtering ===" setup_test_env mkdir -p .humanize/skill -create_skill_invocation "2026-02-19_21-00-00-111-aaa" "success" "gpt-5.4" "high" "10s" "Real question" +create_skill_invocation "2026-02-19_21-00-00-111-aaa" "success" "gpt-5.5" "high" "10s" "Real question" # Create a non-matching directory mkdir -p ".humanize/skill/not-a-skill-dir" echo "junk" > ".humanize/skill/not-a-skill-dir/input.md" diff --git a/tests/test-stop-gate.sh b/tests/test-stop-gate.sh index 08b037b3..8c3b6e25 100755 --- a/tests/test-stop-gate.sh +++ b/tests/test-stop-gate.sh @@ -40,7 +40,7 @@ PLANEOF --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 60 push_every_round: false diff --git a/tests/test-stop-hook-bg-allow.sh b/tests/test-stop-hook-bg-allow.sh index 2f034320..9fdfc0f7 100755 --- a/tests/test-stop-hook-bg-allow.sh +++ b/tests/test-stop-hook-bg-allow.sh @@ -132,7 +132,7 @@ EOF --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 60 push_every_round: false @@ -624,7 +624,7 @@ cat > "$AC11_STATE" <<EOF_AC11 --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 60 push_every_round: false @@ -680,7 +680,7 @@ cat > "$AC11B_STATE" <<EOF_AC11B --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 60 push_every_round: false @@ -754,7 +754,7 @@ cat > "$AC12_BASE/2026-03-02_00-00-00/state.md" <<'EOF_AC12_NEWER' --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high session_id: session_foreign --- @@ -765,7 +765,7 @@ cat > "$AC12_BASE/2026-03-01_00-00-00/state.md" <<'EOF_AC12_OLDER' --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high session_id: session_home --- @@ -804,7 +804,7 @@ cat > "$AC13_STATE" <<EOF_AC13 --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 60 push_every_round: false @@ -860,7 +860,7 @@ cat > "$AC14_STATE" <<EOF_AC14 --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 60 push_every_round: false @@ -968,7 +968,7 @@ cat > "$AC17_STATE" <<EOF_AC17 --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 60 push_every_round: false @@ -1018,7 +1018,7 @@ cat > "$AC17C_STATE" <<EOF_AC17C --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 60 push_every_round: false @@ -1062,7 +1062,7 @@ cat > "$AC18_BASE/2026-03-02_00-00-00/state.md" <<'EOF_AC18' --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high session_id: session_foreign --- @@ -1112,7 +1112,7 @@ cat > "$AC19_STATE" <<EOF_AC19 --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 60 push_every_round: false @@ -1167,7 +1167,7 @@ cat > "$AC20_STATE" <<EOF_AC20 --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 60 push_every_round: false @@ -1365,7 +1365,7 @@ cat > "$AC22B_STATE" <<EOF_AC22B --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 60 push_every_round: false diff --git a/tests/test-stop-hook-legacy-compat.sh b/tests/test-stop-hook-legacy-compat.sh index 7fa1f449..638de91e 100755 --- a/tests/test-stop-hook-legacy-compat.sh +++ b/tests/test-stop-hook-legacy-compat.sh @@ -68,7 +68,7 @@ EOF --- current_round: 0 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 60 push_every_round: false diff --git a/tests/test-unified-codex-config.sh b/tests/test-unified-codex-config.sh index 5948193f..51e1e9b6 100755 --- a/tests/test-unified-codex-config.sh +++ b/tests/test-unified-codex-config.sh @@ -65,8 +65,8 @@ DEFAULT_CONFIG="$PROJECT_ROOT/config/default_config.json" if ! command -v jq >/dev/null 2>&1; then skip "default config tests require jq" "jq not found" else - assert_eq "default_config.json: codex_model is gpt-5.4" \ - "gpt-5.4" "$(jq -r '.codex_model' "$DEFAULT_CONFIG")" + assert_eq "default_config.json: codex_model is gpt-5.5" \ + "gpt-5.5" "$(jq -r '.codex_model' "$DEFAULT_CONFIG")" assert_eq "default_config.json: codex_effort is high" \ "high" "$(jq -r '.codex_effort' "$DEFAULT_CONFIG")" @@ -100,8 +100,8 @@ else merged=$(XDG_CONFIG_HOME="$TEST_DIR/no-user-config" load_merged_config "$PROJECT_ROOT" "$PROJECT_DIR" 2>/dev/null) - assert_eq "default-only: codex_model defaults to gpt-5.4" \ - "gpt-5.4" "$(get_config_value "$merged" "codex_model")" + assert_eq "default-only: codex_model defaults to gpt-5.5" \ + "gpt-5.5" "$(get_config_value "$merged" "codex_model")" assert_eq "default-only: codex_effort defaults to high" \ "high" "$(get_config_value "$merged" "codex_effort")" @@ -141,7 +141,7 @@ else " 2>/dev/null || echo "ERROR") assert_eq "loop-common.sh: DEFAULT_CODEX_MODEL is set" \ - "gpt-5.4" "$(echo "$result" | cut -d'|' -f1)" + "gpt-5.5" "$(echo "$result" | cut -d'|' -f1)" assert_eq "loop-common.sh: DEFAULT_CODEX_EFFORT is set" \ "high" "$(echo "$result" | cut -d'|' -f2)" @@ -208,8 +208,8 @@ else result_line="$(printf '%s\n' "$result" | grep '^RESULT:' | tail -n 1)" - assert_eq "invalid config: codex_model falls back to gpt-5.4" \ - "gpt-5.4" "$(echo "$result_line" | cut -d':' -f2 | cut -d'|' -f1)" + assert_eq "invalid config: codex_model falls back to gpt-5.5" \ + "gpt-5.5" "$(echo "$result_line" | cut -d':' -f2 | cut -d'|' -f1)" assert_eq "invalid config: codex_effort falls back to high" \ "high" "$(echo "$result_line" | cut -d'|' -f2)" @@ -236,8 +236,8 @@ else result_line="$(printf '%s\n' "$result" | grep '^RESULT:' | tail -n 1)" - assert_eq "non-Codex config ($invalid_model): codex_model falls back to gpt-5.4" \ - "gpt-5.4" "$(echo "$result_line" | cut -d':' -f2 | cut -d'|' -f1)" + assert_eq "non-Codex config ($invalid_model): codex_model falls back to gpt-5.5" \ + "gpt-5.5" "$(echo "$result_line" | cut -d':' -f2 | cut -d'|' -f1)" assert_eq "non-Codex config ($invalid_model): codex_effort stays at high fallback" \ "high" "$(echo "$result_line" | cut -d'|' -f2)" @@ -324,8 +324,8 @@ BARE_EOF echo \"\$EXEC_MODEL|\$EXEC_EFFORT\" " 2>/dev/null || echo "ERROR") - assert_eq "bare state: falls back to DEFAULT_CODEX_MODEL (gpt-5.4)" \ - "gpt-5.4" "$(echo "$result" | cut -d'|' -f1)" + assert_eq "bare state: falls back to DEFAULT_CODEX_MODEL (gpt-5.5)" \ + "gpt-5.5" "$(echo "$result" | cut -d'|' -f1)" assert_eq "bare state: falls back to DEFAULT_CODEX_EFFORT (high)" \ "high" "$(echo "$result" | cut -d'|' -f2)" @@ -422,7 +422,7 @@ else --- current_round: 1 max_iterations: 42 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: high codex_timeout: 5400 push_every_round: false @@ -460,8 +460,8 @@ STALE_EOF echo \"\$STATE_CODEX_MODEL|\$STATE_CODEX_EFFORT\" " 2>/dev/null || echo "ERROR") - assert_eq "stale state: STATE_CODEX_MODEL still parsed (gpt-5.4)" \ - "gpt-5.4" "$(echo "$result" | cut -d'|' -f1)" + assert_eq "stale state: STATE_CODEX_MODEL still parsed (gpt-5.5)" \ + "gpt-5.5" "$(echo "$result" | cut -d'|' -f1)" assert_eq "stale state: STATE_CODEX_EFFORT still parsed (high)" \ "high" "$(echo "$result" | cut -d'|' -f2)" @@ -491,7 +491,7 @@ else --- current_round: 1 max_iterations: 10 -codex_model: gpt-5.4 +codex_model: gpt-5.5 codex_effort: superhigh codex_timeout: 3600 push_every_round: false @@ -640,14 +640,14 @@ echo "" echo "--- Input validation ---" # Test invalid model name (has spaces) - test the validation regex directly -model_with_spaces="gpt 5.4 bad" +model_with_spaces="gpt 5.5 bad" if [[ ! "$model_with_spaces" =~ ^[a-zA-Z0-9._-]+$ ]]; then pass "validation: model with spaces is rejected by regex" else fail "validation: model with spaces is rejected by regex" fi -model_with_shell="gpt-5.4;rm-rf" +model_with_shell="gpt-5.5;rm-rf" if [[ ! "$model_with_shell" =~ ^[a-zA-Z0-9._-]+$ ]]; then pass "validation: model with shell metacharacters is rejected" else