Skip to content

Commit 93cbad2

Browse files
authored
fix(core): preserve reasoning_content during session resume and active sessions (GH#3579) (#3590)
* fix(core): preserve reasoning_content during session resume and active sessions (GH#3579) * chore(core): remove dead thinkingThresholdMinutes config after latch removal (GH#3579)
1 parent e47b228 commit 93cbad2

9 files changed

Lines changed: 126 additions & 126 deletions

File tree

docs/users/configuration/settings.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,6 @@ The `extra_body` field allows you to add custom parameters to the request body s
223223
| `context.fileFiltering.respectQwenIgnore` | boolean | Respect .qwenignore files when searching. | `true` |
224224
| `context.fileFiltering.enableRecursiveFileSearch` | boolean | Whether to enable searching recursively for filenames under the current tree when completing `@` prefixes in the prompt. | `true` |
225225
| `context.fileFiltering.enableFuzzySearch` | boolean | When `true`, enables fuzzy search capabilities when searching for files. Set to `false` to improve performance on projects with a large number of files. | `true` |
226-
| `context.clearContextOnIdle.thinkingThresholdMinutes` | number | Minutes of inactivity before clearing old thinking blocks to free context tokens. Aligns with typical provider prompt-cache TTL. Use `-1` to disable. | `5` |
227226
| `context.clearContextOnIdle.toolResultsThresholdMinutes` | number | Minutes of inactivity before clearing old tool result content. Use `-1` to disable. | `60` |
228227
| `context.clearContextOnIdle.toolResultsNumToKeep` | number | Number of most-recent compactable tool results to preserve when clearing. Floor at 1. | `5` |
229228

packages/cli/src/config/settingsSchema.ts

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,16 +1012,6 @@ const SETTINGS_SCHEMA = {
10121012
'Settings for clearing stale context after idle periods. Use -1 to disable a threshold.',
10131013
showInDialog: false,
10141014
properties: {
1015-
thinkingThresholdMinutes: {
1016-
type: 'number',
1017-
label: 'Thinking Idle Threshold (minutes)',
1018-
category: 'Context',
1019-
requiresRestart: false,
1020-
default: 5 as number,
1021-
description:
1022-
'Minutes of inactivity before clearing old thinking blocks. Use -1 to disable.',
1023-
showInDialog: false,
1024-
},
10251015
toolResultsThresholdMinutes: {
10261016
type: 'number',
10271017
label: 'Tool Results Idle Threshold (minutes)',

packages/core/src/config/config.ts

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,6 @@ export interface ChatCompressionSettings {
203203
* Threshold values of -1 mean "never clear" (disabled).
204204
*/
205205
export interface ClearContextOnIdleSettings {
206-
/** Minutes idle before clearing old thinking blocks. Default 5. Use -1 to disable. */
207-
thinkingThresholdMinutes?: number;
208206
/** Minutes idle before clearing old tool results. Default 60. Use -1 to disable. */
209207
toolResultsThresholdMinutes?: number;
210208
/** Number of most-recent tool results to preserve. Default 5. */
@@ -766,8 +764,6 @@ export class Config {
766764
this.bugCommand = params.bugCommand;
767765
this.maxSessionTurns = params.maxSessionTurns ?? -1;
768766
this.clearContextOnIdle = {
769-
thinkingThresholdMinutes:
770-
params.clearContextOnIdle?.thinkingThresholdMinutes ?? 5,
771767
toolResultsThresholdMinutes:
772768
params.clearContextOnIdle?.toolResultsThresholdMinutes ?? 60,
773769
toolResultsNumToKeep:

packages/core/src/core/client.test.ts

Lines changed: 1 addition & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,6 @@ describe('Gemini Client (client.ts)', () => {
347347
getFileService: vi.fn().mockReturnValue(fileService),
348348
getMaxSessionTurns: vi.fn().mockReturnValue(0),
349349
getClearContextOnIdle: vi.fn().mockReturnValue({
350-
thinkingThresholdMinutes: 5,
351350
toolResultsThresholdMinutes: 60,
352351
toolResultsNumToKeep: 5,
353352
}),
@@ -473,73 +472,10 @@ describe('Gemini Client (client.ts)', () => {
473472
addHistory: vi.fn(),
474473
getHistory: vi.fn().mockReturnValue([]),
475474
stripThoughtsFromHistory: vi.fn(),
476-
stripThoughtsFromHistoryKeepRecent: vi.fn(),
477475
};
478476
client['chat'] = mockChat as GeminiChat;
479477
});
480478

481-
it('should not strip thoughts on active session (< 5min idle)', async () => {
482-
// Simulate a recent API completion (2 minutes ago — within default 5 min threshold)
483-
client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
484-
client['thinkingClearLatched'] = false;
485-
486-
const gen = client.sendMessageStream(
487-
[{ text: 'Hello' }],
488-
new AbortController().signal,
489-
'prompt-1',
490-
{ type: SendMessageType.UserQuery },
491-
);
492-
for await (const _ of gen) {
493-
/* drain */
494-
}
495-
496-
expect(
497-
mockChat.stripThoughtsFromHistoryKeepRecent,
498-
).not.toHaveBeenCalled();
499-
});
500-
501-
it('should latch and strip thoughts after > 5min idle', async () => {
502-
// Simulate an old API completion (10 minutes ago — exceeds default 5 min threshold)
503-
client['lastApiCompletionTimestamp'] = Date.now() - 10 * 60 * 1000;
504-
client['thinkingClearLatched'] = false;
505-
506-
const gen = client.sendMessageStream(
507-
[{ text: 'Hello' }],
508-
new AbortController().signal,
509-
'prompt-2',
510-
{ type: SendMessageType.UserQuery },
511-
);
512-
for await (const _ of gen) {
513-
/* drain */
514-
}
515-
516-
expect(client['thinkingClearLatched']).toBe(true);
517-
expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
518-
1,
519-
);
520-
});
521-
522-
it('should keep stripping once latched even if idle < 5min', async () => {
523-
// Pre-set latch with a recent timestamp (2 minutes ago — within threshold)
524-
client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
525-
client['thinkingClearLatched'] = true;
526-
527-
const gen = client.sendMessageStream(
528-
[{ text: 'Hello' }],
529-
new AbortController().signal,
530-
'prompt-3',
531-
{ type: SendMessageType.UserQuery },
532-
);
533-
for await (const _ of gen) {
534-
/* drain */
535-
}
536-
537-
expect(client['thinkingClearLatched']).toBe(true);
538-
expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
539-
1,
540-
);
541-
});
542-
543479
it('should update lastApiCompletionTimestamp after API call', async () => {
544480
client['lastApiCompletionTimestamp'] = null;
545481

@@ -559,13 +495,11 @@ describe('Gemini Client (client.ts)', () => {
559495
);
560496
});
561497

562-
it('should reset latch and timestamp on resetChat', async () => {
498+
it('should reset lastApiCompletionTimestamp on resetChat', async () => {
563499
client['lastApiCompletionTimestamp'] = Date.now();
564-
client['thinkingClearLatched'] = true;
565500

566501
await client.resetChat();
567502

568-
expect(client['thinkingClearLatched']).toBe(false);
569503
expect(client['lastApiCompletionTimestamp']).toBeNull();
570504
});
571505
});

packages/core/src/core/client.ts

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -156,17 +156,6 @@ export class GeminiClient {
156156
*/
157157
private lastApiCompletionTimestamp: number | null = null;
158158

159-
/**
160-
* Sticky-on latch for clearing thinking blocks from prior turns.
161-
* Triggered when idle exceeds the configured threshold (default 5 min,
162-
* aligned with provider prompt-cache TTL). Once latched, stays true to
163-
* prevent oscillation: without it, thinking would accumulate → get
164-
* stripped → accumulate again, causing the message prefix to change
165-
* repeatedly (bad for provider-side prompt caching and wastes context).
166-
* Reset on /clear (resetChat).
167-
*/
168-
private thinkingClearLatched = false;
169-
170159
constructor(private readonly config: Config) {
171160
this.loopDetector = new LoopDetectionService(config);
172161
}
@@ -242,8 +231,6 @@ export class GeminiClient {
242231

243232
async resetChat(): Promise<void> {
244233
this.surfacedRelevantAutoMemoryPaths.clear();
245-
// Reset thinking clear latch — fresh chat, no prior thinking to clean up
246-
this.thinkingClearLatched = false;
247234
this.lastApiCompletionTimestamp = null;
248235
await this.startChat();
249236
}
@@ -695,29 +682,6 @@ export class GeminiClient {
695682
this.config.getChatRecordingService()?.recordUserMessage(request);
696683
}
697684

698-
// Idle cleanup: clear stale thinking blocks after idle period.
699-
// Latch: once triggered, never revert — prevents oscillation.
700-
const idleConfig = this.config.getClearContextOnIdle();
701-
const thinkingThresholdMin = idleConfig.thinkingThresholdMinutes ?? 5;
702-
if (
703-
thinkingThresholdMin >= 0 &&
704-
!this.thinkingClearLatched &&
705-
this.lastApiCompletionTimestamp !== null
706-
) {
707-
const thresholdMs = thinkingThresholdMin * 60 * 1000;
708-
const idleMs = Date.now() - this.lastApiCompletionTimestamp;
709-
if (idleMs > thresholdMs) {
710-
this.thinkingClearLatched = true;
711-
debugLogger.debug(
712-
`Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${thresholdMs / 1000}s`,
713-
);
714-
}
715-
}
716-
if (this.thinkingClearLatched) {
717-
this.getChat().stripThoughtsFromHistoryKeepRecent(1);
718-
debugLogger.debug('Stripped old thinking blocks (keeping last 1 turn)');
719-
}
720-
721685
// Idle cleanup: clear old tool results when idle > threshold.
722686
// Runs on user and cron messages (not tool result submissions or
723687
// retries/hooks) so that model latency during a tool-call loop

packages/core/src/services/microcompaction/microcompact.test.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ function makeModelMessage(text: string): Content {
4141
}
4242

4343
const DEFAULT_SETTINGS: ClearContextOnIdleSettings = {
44-
thinkingThresholdMinutes: 5,
4544
toolResultsThresholdMinutes: 5,
4645
toolResultsNumToKeep: 1,
4746
};

packages/core/src/services/sessionService.test.ts

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -783,5 +783,126 @@ describe('SessionService', () => {
783783
postCompressionRecord.message,
784784
]);
785785
});
786+
787+
it('should preserve thought parts by default (stripThoughtsFromHistory=false)', () => {
788+
const modelWithThought: ChatRecord = {
789+
uuid: 't1',
790+
parentUuid: 'a1',
791+
sessionId: sessionIdA,
792+
timestamp: '2024-01-01T01:00:00Z',
793+
type: 'assistant',
794+
message: {
795+
role: 'model',
796+
parts: [
797+
{ text: 'reasoning step', thought: true },
798+
{ text: 'final answer' },
799+
],
800+
},
801+
cwd: '/test/project/root',
802+
version: '1.0.0',
803+
};
804+
805+
const conversation: ConversationRecord = {
806+
sessionId: sessionIdA,
807+
projectHash: 'test-project-hash',
808+
startTime: '2024-01-01T00:00:00Z',
809+
lastUpdated: '2024-01-01T01:00:00Z',
810+
messages: [recordA1, modelWithThought],
811+
};
812+
813+
const history = buildApiHistoryFromConversation(conversation);
814+
815+
// Thought parts should be preserved by default
816+
expect(history).toHaveLength(2);
817+
expect(history[1].parts).toEqual([
818+
{ text: 'reasoning step', thought: true },
819+
{ text: 'final answer' },
820+
]);
821+
});
822+
823+
it('should strip thought parts when stripThoughtsFromHistory=true', () => {
824+
const modelWithThought: ChatRecord = {
825+
uuid: 't1',
826+
parentUuid: 'a1',
827+
sessionId: sessionIdA,
828+
timestamp: '2024-01-01T01:00:00Z',
829+
type: 'assistant',
830+
message: {
831+
role: 'model',
832+
parts: [
833+
{ text: 'reasoning step', thought: true },
834+
{ text: 'final answer' },
835+
],
836+
},
837+
cwd: '/test/project/root',
838+
version: '1.0.0',
839+
};
840+
841+
const conversation: ConversationRecord = {
842+
sessionId: sessionIdA,
843+
projectHash: 'test-project-hash',
844+
startTime: '2024-01-01T00:00:00Z',
845+
lastUpdated: '2024-01-01T01:00:00Z',
846+
messages: [recordA1, modelWithThought],
847+
};
848+
849+
const history = buildApiHistoryFromConversation(conversation, {
850+
stripThoughtsFromHistory: true,
851+
});
852+
853+
// Thought parts should be stripped
854+
expect(history).toHaveLength(2);
855+
expect(history[1].parts).toEqual([{ text: 'final answer' }]);
856+
});
857+
858+
it('should preserve thought parts in compressed history by default', () => {
859+
const compressionRecord: ChatRecord = {
860+
uuid: 'c1',
861+
parentUuid: 'b2',
862+
sessionId: sessionIdA,
863+
timestamp: '2024-01-02T03:00:00Z',
864+
type: 'system',
865+
subtype: 'chat_compression',
866+
cwd: '/test/project/root',
867+
version: '1.0.0',
868+
gitBranch: 'main',
869+
systemPayload: {
870+
info: {
871+
originalTokenCount: 100,
872+
newTokenCount: 50,
873+
compressionStatus: CompressionStatus.COMPRESSED,
874+
},
875+
compressedHistory: [
876+
{ role: 'user', parts: [{ text: 'summary' }] },
877+
{
878+
role: 'model',
879+
parts: [
880+
{ text: 'deep thinking', thought: true },
881+
{ text: 'final answer' },
882+
],
883+
},
884+
],
885+
},
886+
};
887+
888+
const conversation: ConversationRecord = {
889+
sessionId: sessionIdA,
890+
projectHash: 'test-project-hash',
891+
startTime: '2024-01-01T00:00:00Z',
892+
lastUpdated: '2024-01-02T03:00:00Z',
893+
messages: [recordA1, recordB2, compressionRecord],
894+
};
895+
896+
const history = buildApiHistoryFromConversation(conversation);
897+
898+
// Thought parts should be preserved in compressed history by default.
899+
// The compressedHistory has 2 entries (user, model), and no messages
900+
// exist after the compression record, so the result is 2 items.
901+
expect(history).toHaveLength(2);
902+
expect(history[1].parts).toEqual([
903+
{ text: 'deep thinking', thought: true },
904+
{ text: 'final answer' },
905+
]);
906+
});
786907
});
787908
});

packages/core/src/services/sessionService.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,9 @@ export interface BuildApiHistoryOptions {
865865
/**
866866
* Whether to strip thought parts from the history.
867867
* Thought parts are content parts that have `thought: true`.
868-
* @default true
868+
* Keeping thoughts ensures `reasoning_content` from reasoning models
869+
* (e.g. DeepSeek) is properly passed back in subsequent API calls.
870+
* @default false
869871
*/
870872
stripThoughtsFromHistory?: boolean;
871873
}
@@ -906,7 +908,7 @@ export function buildApiHistoryFromConversation(
906908
conversation: ConversationRecord,
907909
options: BuildApiHistoryOptions = {},
908910
): Content[] {
909-
const { stripThoughtsFromHistory = true } = options;
911+
const { stripThoughtsFromHistory = false } = options;
910912
const { messages } = conversation;
911913

912914
let lastCompressionIndex = -1;

packages/vscode-ide-companion/schemas/settings.schema.json

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -402,11 +402,6 @@
402402
"description": "Settings for clearing stale context after idle periods. Use -1 to disable a threshold.",
403403
"type": "object",
404404
"properties": {
405-
"thinkingThresholdMinutes": {
406-
"description": "Minutes of inactivity before clearing old thinking blocks. Use -1 to disable.",
407-
"type": "number",
408-
"default": 5
409-
},
410405
"toolResultsThresholdMinutes": {
411406
"description": "Minutes of inactivity before clearing old tool result content. Use -1 to disable.",
412407
"type": "number",

0 commit comments

Comments
 (0)