-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathbatch-native-analysis.sh
More file actions
executable file
·225 lines (193 loc) · 7.57 KB
/
batch-native-analysis.sh
File metadata and controls
executable file
·225 lines (193 loc) · 7.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
#!/usr/bin/env bash
# batch-native-analysis.sh — Analyze all unanalyzed sessions via `claude -p`.
#
# Uses `code-insights insights <id> --native --force` for each session.
# Stops immediately on rate limit (429) errors to avoid wasting API calls.
# Resume-safe: re-run anytime — already-analyzed sessions are skipped.
#
# Usage:
# ./batch-native-analysis.sh # Run all unanalyzed sessions
# ./batch-native-analysis.sh --dry-run # List sessions without running
# ./batch-native-analysis.sh --delay 10 # Custom delay between calls (default: 5s)
# ./batch-native-analysis.sh --min-msgs 5 # Min message count filter (default: 3)
# ./batch-native-analysis.sh --retry-failed # Re-run only previously failed sessions
# ./batch-native-analysis.sh --model opus # Use a specific model (default: sonnet)
set -euo pipefail
DB_PATH="${CODE_INSIGHTS_DB:-$HOME/.code-insights/data.db}"
FAILED_LOG="batch-native-failures.log"
DRY_RUN=false
DELAY_BETWEEN=5
MIN_MESSAGES=3
RETRY_FAILED=false
MODEL="sonnet"
# Parse flags
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run) DRY_RUN=true; shift ;;
--delay) DELAY_BETWEEN="$2"; shift 2 ;;
--min-msgs) MIN_MESSAGES="$2"; shift 2 ;;
--retry-failed) RETRY_FAILED=true; shift ;;
--model) MODEL="$2"; shift 2 ;;
--help|-h)
head -14 "$0" | tail -9
exit 0
;;
*) echo "Unknown flag: $1"; exit 1 ;;
esac
done
# ── Prerequisites ─────────────────────────────────────────────────────────────
if ! command -v sqlite3 &>/dev/null; then
echo "Error: sqlite3 not found"
exit 1
fi
if ! command -v claude &>/dev/null; then
echo "Error: claude CLI not found in PATH"
echo "Install from: https://claude.ai/download"
exit 1
fi
if ! command -v code-insights &>/dev/null; then
echo "Error: code-insights CLI not found in PATH"
echo "Run: cd cli && pnpm build && npm link"
exit 1
fi
if [ ! -f "$DB_PATH" ]; then
echo "Error: Database not found at $DB_PATH"
exit 1
fi
# ── Build session list ────────────────────────────────────────────────────────
if [ "$RETRY_FAILED" = true ] && [ -f "$FAILED_LOG" ]; then
# Re-run only previously failed sessions
SESSION_IDS=$(cat "$FAILED_LOG" | sort -u)
SOURCE="failed log ($FAILED_LOG)"
else
# All sessions missing analysis_usage (never analyzed), with enough messages
SESSION_IDS=$(sqlite3 "$DB_PATH" "
SELECT s.id
FROM sessions s
LEFT JOIN analysis_usage au ON au.session_id = s.id AND au.analysis_type = 'session'
WHERE s.deleted_at IS NULL
AND s.message_count >= $MIN_MESSAGES
AND au.session_id IS NULL
ORDER BY s.message_count DESC;
")
SOURCE="unanalyzed (min ${MIN_MESSAGES} messages)"
fi
if [ -z "$SESSION_IDS" ]; then
echo "No sessions to analyze."
exit 0
fi
TOTAL=$(echo "$SESSION_IDS" | wc -l | tr -d ' ')
echo "============================================"
echo " Code Insights — Batch Native Analysis"
echo "============================================"
echo " Source: $SOURCE"
echo " Sessions: $TOTAL"
echo " Model: $MODEL"
echo " Delay: ${DELAY_BETWEEN}s between calls"
echo " Min messages: $MIN_MESSAGES"
echo " Dry run: $DRY_RUN"
echo " DB: $DB_PATH"
echo "============================================"
echo ""
if [ "$DRY_RUN" = true ]; then
echo "Sessions to analyze:"
IDX=0
for SESSION_ID in $SESSION_IDS; do
IDX=$((IDX + 1))
INFO=$(sqlite3 "$DB_PATH" "SELECT message_count || ' msgs | ' || project_name FROM sessions WHERE id = '$SESSION_ID';")
echo " [$IDX/$TOTAL] $INFO $SESSION_ID"
done
echo ""
echo "(dry run — no analysis calls made)"
exit 0
fi
# ── Run analysis ──────────────────────────────────────────────────────────────
# Clear failed log for this run (append mode — retry-failed reads it first)
> "$FAILED_LOG"
START_TIME=$(date +%s)
SUCCESS=0
FAILED=0
IDX=0
for SESSION_ID in $SESSION_IDS; do
IDX=$((IDX + 1))
INFO=$(sqlite3 "$DB_PATH" "
SELECT message_count || ' msgs | ' || project_name
FROM sessions WHERE id = '$SESSION_ID';
")
printf "[$IDX/$TOTAL] $INFO ... "
SESSION_START=$(date +%s)
# Run analysis — capture both stdout and stderr
OUTPUT=""
EXIT_CODE=0
OUTPUT=$(code-insights insights "$SESSION_ID" --native --force --model "$MODEL" 2>&1) || EXIT_CODE=$?
ELAPSED=$(( $(date +%s) - SESSION_START ))
if [ "$EXIT_CODE" -eq 0 ]; then
SUCCESS=$((SUCCESS + 1))
echo "done (${ELAPSED}s)"
else
# ── Rate limit / overloaded detection ──────────────────────────────
# Check for 429, rate limit, overloaded, or capacity errors in output.
# These mean we should STOP — not retry — to avoid hammering the API.
if echo "$OUTPUT" | grep -qiE '429|rate.?limit|overloaded|too many requests|capacity|throttl'; then
echo "RATE LIMITED"
echo ""
echo "============================================"
echo " STOPPED — Rate limit or capacity error"
echo "============================================"
echo " Error output:"
echo " $OUTPUT" | head -5
echo ""
echo " $SUCCESS sessions completed before hitting the limit."
echo " Remaining: $((TOTAL - IDX)) sessions not attempted."
echo ""
echo " To resume, wait a few minutes then re-run:"
echo " ./batch-native-analysis.sh"
echo "============================================"
# Log this session as failed so --retry-failed picks it up
echo "$SESSION_ID" >> "$FAILED_LOG"
# Also log remaining un-attempted sessions
REMAINING_IDS=$(echo "$SESSION_IDS" | tail -n +$((IDX + 1)))
if [ -n "$REMAINING_IDS" ]; then
echo "$REMAINING_IDS" >> "$FAILED_LOG"
fi
# Print summary and exit
END_TIME=$(date +%s)
TOTAL_ELAPSED=$(( END_TIME - START_TIME ))
echo ""
echo " Completed: $SUCCESS"
echo " Failed: $((FAILED + 1))"
echo " Not started: $((TOTAL - IDX))"
echo " Elapsed: $((TOTAL_ELAPSED / 60))m $((TOTAL_ELAPSED % 60))s"
echo " Failed log: $FAILED_LOG ($((FAILED + 1 + TOTAL - IDX)) session IDs)"
exit 2
fi
# Non-rate-limit error — log and continue
FAILED=$((FAILED + 1))
echo "FAILED (${ELAPSED}s)"
echo " Error: $(echo "$OUTPUT" | head -3)"
echo "$SESSION_ID" >> "$FAILED_LOG"
fi
# Delay between calls (skip after last session)
if [ "$IDX" -lt "$TOTAL" ]; then
sleep "$DELAY_BETWEEN"
fi
done
# ── Summary ───────────────────────────────────────────────────────────────────
END_TIME=$(date +%s)
TOTAL_ELAPSED=$(( END_TIME - START_TIME ))
echo ""
echo "============================================"
echo " Summary"
echo "============================================"
echo " Total: $TOTAL"
echo " Model: $MODEL"
echo " Success: $SUCCESS"
echo " Failed: $FAILED"
echo " Elapsed: $((TOTAL_ELAPSED / 60))m $((TOTAL_ELAPSED % 60))s"
if [ "$FAILED" -gt 0 ]; then
echo " Failed log: $FAILED_LOG ($FAILED session IDs)"
echo ""
echo " To retry failed sessions:"
echo " ./batch-native-analysis.sh --retry-failed"
fi
echo "============================================"