fix(core): preserve reasoning_content during session resume and active sessions (GH#3579) (#3590)

fyc09 · web-flow · commit 93cbad24b1ec · 2026-04-24T17:49:05.000+08:00
* fix(core): preserve reasoning_content during session resume and active sessions (GH#3579)

* chore(core): remove dead thinkingThresholdMinutes config after latch removal (GH#3579)
diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md
@@ -223,7 +223,6 @@ The `extra_body` field allows you to add custom parameters to the request body s
 | `context.fileFiltering.respectQwenIgnore`                | boolean                    | Respect .qwenignore files when searching.                                                                                                                                                                                                                                                                                                                             | `true`      |
 | `context.fileFiltering.enableRecursiveFileSearch`        | boolean                    | Whether to enable searching recursively for filenames under the current tree when completing `@` prefixes in the prompt.                                                                                                                                                                                                                                              | `true`      |
 | `context.fileFiltering.enableFuzzySearch`                | boolean                    | When `true`, enables fuzzy search capabilities when searching for files. Set to `false` to improve performance on projects with a large number of files.                                                                                                                                                                                                              | `true`      |
-| `context.clearContextOnIdle.thinkingThresholdMinutes`    | number                     | Minutes of inactivity before clearing old thinking blocks to free context tokens. Aligns with typical provider prompt-cache TTL. Use `-1` to disable.                                                                                                                                                                                                                 | `5`         |
 | `context.clearContextOnIdle.toolResultsThresholdMinutes` | number                     | Minutes of inactivity before clearing old tool result content. Use `-1` to disable.                                                                                                                                                                                                                                                                                   | `60`        |
 | `context.clearContextOnIdle.toolResultsNumToKeep`        | number                     | Number of most-recent compactable tool results to preserve when clearing. Floor at 1.                                                                                                                                                                                                                                                                                 | `5`         |
 
diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts
@@ -1012,16 +1012,6 @@ const SETTINGS_SCHEMA = {
           'Settings for clearing stale context after idle periods. Use -1 to disable a threshold.',
         showInDialog: false,
         properties: {
-          thinkingThresholdMinutes: {
-            type: 'number',
-            label: 'Thinking Idle Threshold (minutes)',
-            category: 'Context',
-            requiresRestart: false,
-            default: 5 as number,
-            description:
-              'Minutes of inactivity before clearing old thinking blocks. Use -1 to disable.',
-            showInDialog: false,
-          },
           toolResultsThresholdMinutes: {
             type: 'number',
             label: 'Tool Results Idle Threshold (minutes)',
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
@@ -203,8 +203,6 @@ export interface ChatCompressionSettings {
  * Threshold values of -1 mean "never clear" (disabled).
  */
 export interface ClearContextOnIdleSettings {
-  /** Minutes idle before clearing old thinking blocks. Default 5. Use -1 to disable. */
-  thinkingThresholdMinutes?: number;
   /** Minutes idle before clearing old tool results. Default 60. Use -1 to disable. */
   toolResultsThresholdMinutes?: number;
   /** Number of most-recent tool results to preserve. Default 5. */
@@ -766,8 +764,6 @@ export class Config {
     this.bugCommand = params.bugCommand;
     this.maxSessionTurns = params.maxSessionTurns ?? -1;
     this.clearContextOnIdle = {
-      thinkingThresholdMinutes:
-        params.clearContextOnIdle?.thinkingThresholdMinutes ?? 5,
       toolResultsThresholdMinutes:
         params.clearContextOnIdle?.toolResultsThresholdMinutes ?? 60,
       toolResultsNumToKeep:
diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts
@@ -347,7 +347,6 @@ describe('Gemini Client (client.ts)', () => {
       getFileService: vi.fn().mockReturnValue(fileService),
       getMaxSessionTurns: vi.fn().mockReturnValue(0),
       getClearContextOnIdle: vi.fn().mockReturnValue({
-        thinkingThresholdMinutes: 5,
         toolResultsThresholdMinutes: 60,
         toolResultsNumToKeep: 5,
       }),
@@ -473,73 +472,10 @@ describe('Gemini Client (client.ts)', () => {
         addHistory: vi.fn(),
         getHistory: vi.fn().mockReturnValue([]),
         stripThoughtsFromHistory: vi.fn(),
-        stripThoughtsFromHistoryKeepRecent: vi.fn(),
       };
       client['chat'] = mockChat as GeminiChat;
     });
 
-    it('should not strip thoughts on active session (< 5min idle)', async () => {
-      // Simulate a recent API completion (2 minutes ago — within default 5 min threshold)
-      client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
-      client['thinkingClearLatched'] = false;
-
-      const gen = client.sendMessageStream(
-        [{ text: 'Hello' }],
-        new AbortController().signal,
-        'prompt-1',
-        { type: SendMessageType.UserQuery },
-      );
-      for await (const _ of gen) {
-        /* drain */
-      }
-
-      expect(
-        mockChat.stripThoughtsFromHistoryKeepRecent,
-      ).not.toHaveBeenCalled();
-    });
-
-    it('should latch and strip thoughts after > 5min idle', async () => {
-      // Simulate an old API completion (10 minutes ago — exceeds default 5 min threshold)
-      client['lastApiCompletionTimestamp'] = Date.now() - 10 * 60 * 1000;
-      client['thinkingClearLatched'] = false;
-
-      const gen = client.sendMessageStream(
-        [{ text: 'Hello' }],
-        new AbortController().signal,
-        'prompt-2',
-        { type: SendMessageType.UserQuery },
-      );
-      for await (const _ of gen) {
-        /* drain */
-      }
-
-      expect(client['thinkingClearLatched']).toBe(true);
-      expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
-        1,
-      );
-    });
-
-    it('should keep stripping once latched even if idle < 5min', async () => {
-      // Pre-set latch with a recent timestamp (2 minutes ago — within threshold)
-      client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
-      client['thinkingClearLatched'] = true;
-
-      const gen = client.sendMessageStream(
-        [{ text: 'Hello' }],
-        new AbortController().signal,
-        'prompt-3',
-        { type: SendMessageType.UserQuery },
-      );
-      for await (const _ of gen) {
-        /* drain */
-      }
-
-      expect(client['thinkingClearLatched']).toBe(true);
-      expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
-        1,
-      );
-    });
-
     it('should update lastApiCompletionTimestamp after API call', async () => {
       client['lastApiCompletionTimestamp'] = null;
 
@@ -559,13 +495,11 @@ describe('Gemini Client (client.ts)', () => {
       );
     });
 
-    it('should reset latch and timestamp on resetChat', async () => {
+    it('should reset lastApiCompletionTimestamp on resetChat', async () => {
       client['lastApiCompletionTimestamp'] = Date.now();
-      client['thinkingClearLatched'] = true;
 
       await client.resetChat();
 
-      expect(client['thinkingClearLatched']).toBe(false);
       expect(client['lastApiCompletionTimestamp']).toBeNull();
     });
   });
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
@@ -156,17 +156,6 @@ export class GeminiClient {
    */
   private lastApiCompletionTimestamp: number | null = null;
 
-  /**
-   * Sticky-on latch for clearing thinking blocks from prior turns.
-   * Triggered when idle exceeds the configured threshold (default 5 min,
-   * aligned with provider prompt-cache TTL). Once latched, stays true to
-   * prevent oscillation: without it, thinking would accumulate → get
-   * stripped → accumulate again, causing the message prefix to change
-   * repeatedly (bad for provider-side prompt caching and wastes context).
-   * Reset on /clear (resetChat).
-   */
-  private thinkingClearLatched = false;
-
   constructor(private readonly config: Config) {
     this.loopDetector = new LoopDetectionService(config);
   }
@@ -242,8 +231,6 @@ export class GeminiClient {
 
   async resetChat(): Promise<void> {
     this.surfacedRelevantAutoMemoryPaths.clear();
-    // Reset thinking clear latch — fresh chat, no prior thinking to clean up
-    this.thinkingClearLatched = false;
     this.lastApiCompletionTimestamp = null;
     await this.startChat();
   }
@@ -695,29 +682,6 @@ export class GeminiClient {
         this.config.getChatRecordingService()?.recordUserMessage(request);
       }
 
-      // Idle cleanup: clear stale thinking blocks after idle period.
-      // Latch: once triggered, never revert — prevents oscillation.
-      const idleConfig = this.config.getClearContextOnIdle();
-      const thinkingThresholdMin = idleConfig.thinkingThresholdMinutes ?? 5;
-      if (
-        thinkingThresholdMin >= 0 &&
-        !this.thinkingClearLatched &&
-        this.lastApiCompletionTimestamp !== null
-      ) {
-        const thresholdMs = thinkingThresholdMin * 60 * 1000;
-        const idleMs = Date.now() - this.lastApiCompletionTimestamp;
-        if (idleMs > thresholdMs) {
-          this.thinkingClearLatched = true;
-          debugLogger.debug(
-            `Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${thresholdMs / 1000}s`,
-          );
-        }
-      }
-      if (this.thinkingClearLatched) {
-        this.getChat().stripThoughtsFromHistoryKeepRecent(1);
-        debugLogger.debug('Stripped old thinking blocks (keeping last 1 turn)');
-      }
-
       // Idle cleanup: clear old tool results when idle > threshold.
       // Runs on user and cron messages (not tool result submissions or
       // retries/hooks) so that model latency during a tool-call loop
diff --git a/packages/core/src/services/microcompaction/microcompact.test.ts b/packages/core/src/services/microcompaction/microcompact.test.ts
@@ -41,7 +41,6 @@ function makeModelMessage(text: string): Content {
 }
 
 const DEFAULT_SETTINGS: ClearContextOnIdleSettings = {
-  thinkingThresholdMinutes: 5,
   toolResultsThresholdMinutes: 5,
   toolResultsNumToKeep: 1,
 };
diff --git a/packages/core/src/services/sessionService.test.ts b/packages/core/src/services/sessionService.test.ts
@@ -783,5 +783,126 @@ describe('SessionService', () => {
         postCompressionRecord.message,
       ]);
     });
+
+    it('should preserve thought parts by default (stripThoughtsFromHistory=false)', () => {
+      const modelWithThought: ChatRecord = {
+        uuid: 't1',
+        parentUuid: 'a1',
+        sessionId: sessionIdA,
+        timestamp: '2024-01-01T01:00:00Z',
+        type: 'assistant',
+        message: {
+          role: 'model',
+          parts: [
+            { text: 'reasoning step', thought: true },
+            { text: 'final answer' },
+          ],
+        },
+        cwd: '/test/project/root',
+        version: '1.0.0',
+      };
+
+      const conversation: ConversationRecord = {
+        sessionId: sessionIdA,
+        projectHash: 'test-project-hash',
+        startTime: '2024-01-01T00:00:00Z',
+        lastUpdated: '2024-01-01T01:00:00Z',
+        messages: [recordA1, modelWithThought],
+      };
+
+      const history = buildApiHistoryFromConversation(conversation);
+
+      // Thought parts should be preserved by default
+      expect(history).toHaveLength(2);
+      expect(history[1].parts).toEqual([
+        { text: 'reasoning step', thought: true },
+        { text: 'final answer' },
+      ]);
+    });
+
+    it('should strip thought parts when stripThoughtsFromHistory=true', () => {
+      const modelWithThought: ChatRecord = {
+        uuid: 't1',
+        parentUuid: 'a1',
+        sessionId: sessionIdA,
+        timestamp: '2024-01-01T01:00:00Z',
+        type: 'assistant',
+        message: {
+          role: 'model',
+          parts: [
+            { text: 'reasoning step', thought: true },
+            { text: 'final answer' },
+          ],
+        },
+        cwd: '/test/project/root',
+        version: '1.0.0',
+      };
+
+      const conversation: ConversationRecord = {
+        sessionId: sessionIdA,
+        projectHash: 'test-project-hash',
+        startTime: '2024-01-01T00:00:00Z',
+        lastUpdated: '2024-01-01T01:00:00Z',
+        messages: [recordA1, modelWithThought],
+      };
+
+      const history = buildApiHistoryFromConversation(conversation, {
+        stripThoughtsFromHistory: true,
+      });
+
+      // Thought parts should be stripped
+      expect(history).toHaveLength(2);
+      expect(history[1].parts).toEqual([{ text: 'final answer' }]);
+    });
+
+    it('should preserve thought parts in compressed history by default', () => {
+      const compressionRecord: ChatRecord = {
+        uuid: 'c1',
+        parentUuid: 'b2',
+        sessionId: sessionIdA,
+        timestamp: '2024-01-02T03:00:00Z',
+        type: 'system',
+        subtype: 'chat_compression',
+        cwd: '/test/project/root',
+        version: '1.0.0',
+        gitBranch: 'main',
+        systemPayload: {
+          info: {
+            originalTokenCount: 100,
+            newTokenCount: 50,
+            compressionStatus: CompressionStatus.COMPRESSED,
+          },
+          compressedHistory: [
+            { role: 'user', parts: [{ text: 'summary' }] },
+            {
+              role: 'model',
+              parts: [
+                { text: 'deep thinking', thought: true },
+                { text: 'final answer' },
+              ],
+            },
+          ],
+        },
+      };
+
+      const conversation: ConversationRecord = {
+        sessionId: sessionIdA,
+        projectHash: 'test-project-hash',
+        startTime: '2024-01-01T00:00:00Z',
+        lastUpdated: '2024-01-02T03:00:00Z',
+        messages: [recordA1, recordB2, compressionRecord],
+      };
+
+      const history = buildApiHistoryFromConversation(conversation);
+
+      // Thought parts should be preserved in compressed history by default.
+      // The compressedHistory has 2 entries (user, model), and no messages
+      // exist after the compression record, so the result is 2 items.
+      expect(history).toHaveLength(2);
+      expect(history[1].parts).toEqual([
+        { text: 'deep thinking', thought: true },
+        { text: 'final answer' },
+      ]);
+    });
   });
 });
diff --git a/packages/core/src/services/sessionService.ts b/packages/core/src/services/sessionService.ts
@@ -865,7 +865,9 @@ export interface BuildApiHistoryOptions {
   /**
    * Whether to strip thought parts from the history.
    * Thought parts are content parts that have `thought: true`.
-   * @default true
+   * Keeping thoughts ensures `reasoning_content` from reasoning models
+   * (e.g. DeepSeek) is properly passed back in subsequent API calls.
+   * @default false
    */
   stripThoughtsFromHistory?: boolean;
 }
@@ -906,7 +908,7 @@ export function buildApiHistoryFromConversation(
   conversation: ConversationRecord,
   options: BuildApiHistoryOptions = {},
 ): Content[] {
-  const { stripThoughtsFromHistory = true } = options;
+  const { stripThoughtsFromHistory = false } = options;
   const { messages } = conversation;
 
   let lastCompressionIndex = -1;
diff --git a/packages/vscode-ide-companion/schemas/settings.schema.json b/packages/vscode-ide-companion/schemas/settings.schema.json
@@ -402,11 +402,6 @@
           "description": "Settings for clearing stale context after idle periods. Use -1 to disable a threshold.",
           "type": "object",
           "properties": {
-            "thinkingThresholdMinutes": {
-              "description": "Minutes of inactivity before clearing old thinking blocks. Use -1 to disable.",
-              "type": "number",
-              "default": 5
-            },
             "toolResultsThresholdMinutes": {
               "description": "Minutes of inactivity before clearing old tool result content. Use -1 to disable.",
               "type": "number",

Original file line number	Diff line number	Diff line change
`@@ -41,7 +41,6 @@ function makeModelMessage(text: string): Content {`
`41`	`41`	`}`
`42`	`42`
`43`	`43`	`const DEFAULT_SETTINGS: ClearContextOnIdleSettings = {`
`44`		`- thinkingThresholdMinutes: 5,`
`45`	`44`	`toolResultsThresholdMinutes: 5,`
`46`	`45`	`toolResultsNumToKeep: 1,`
`47`	`46`	`};`