Skip to content

Commit 5d1052a

Browse files
doudouOUCqwencoder
andauthored
feat(telemetry): define HTTP OTLP endpoint behavior and signal routing (#3779)
* feat(telemetry): define HTTP OTLP endpoint behavior and signal routing - Add resolveHttpOtlpUrl() that appends /v1/traces, /v1/logs, /v1/metrics to base HTTP OTLP endpoints per the OpenTelemetry specification - Add per-signal endpoint overrides (otlpTracesEndpoint, otlpLogsEndpoint, otlpMetricsEndpoint) for backends with non-standard paths (e.g. Alibaba Cloud) - Add LogToSpanProcessor that bridges OTel log records to spans for traces-only backends, with session-based traceId correlation and error status propagation - Auto-wire LogToSpanProcessor when traces URL exists but logs URL doesn't - Validate per-signal URLs gracefully (log error + skip, don't crash) - Preserve query strings when appending signal paths to URLs - Guard gRPC branch against missing base endpoint with per-signal config - Update telemetry documentation with signal routing semantics and Alibaba Cloud HTTP per-signal endpoint examples Closes #3734 Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com> * fix(telemetry): fix TS noPropertyAccessFromIndexSignature errors in tests Use typed ExportedSpan interface and bracket notation for index signature properties to satisfy strict TypeScript checks in CI. Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com> * fix(telemetry): replace MD5 with SHA-256 for traceId derivation CodeQL flagged MD5 as a weak cryptographic algorithm when used with session.id (considered sensitive data). Switch to SHA-256 truncated to 32 hex chars to satisfy CodeQL while maintaining the same traceId format required by the OTel specification. Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com> * fix(telemetry): address review feedback for LogToSpanProcessor robustness - Wrap JSON.stringify in try/catch to handle circular refs and BigInt - Add export timeout (30s) and try/catch to prevent hung shutdown - Track in-flight exports to avoid interval-vs-shutdown race condition - Fix deriveSpanStatus: use truthy checks (!!), drop success===false heuristic since declined tool calls are normal, not errors - Enforce http(s) scheme in validateUrl to reject file:/javascript: URLs - Change DiagLogLevel from ERROR to WARN to preserve operational diagnostics - Preserve logRecord.instrumentationScope instead of hardcoding - Forward severityNumber/severityText as span attributes - Add tests for circular refs, error status edge cases, severity Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com> * fix(telemetry): flush sdk shutdown through cleanup Remove async process exit handlers from telemetry initialization and route SDK shutdown through Config cleanup so normal CLI exit paths await pending telemetry exports. Keep shutdown idempotent while an SDK shutdown is in flight. Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com> * fix(telemetry): harden bridged log shutdown Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com> * fix(telemetry): address review follow-ups Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com> --------- Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
1 parent 35fe97e commit 5d1052a

11 files changed

Lines changed: 1387 additions & 102 deletions

File tree

docs/developers/development/telemetry.md

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,20 +58,36 @@ observability framework — Qwen Code's observability system provides:
5858
All telemetry behavior is controlled through your `.qwen/settings.json` file.
5959
These settings can be overridden by environment variables or CLI flags.
6060

61-
| Setting | Environment Variable | CLI Flag | Description | Values | Default |
62-
| -------------- | ------------------------------ | -------------------------------------------------------- | ------------------------------------------------- | ----------------- | ----------------------- |
63-
| `enabled` | `QWEN_TELEMETRY_ENABLED` | `--telemetry` / `--no-telemetry` | Enable or disable telemetry | `true`/`false` | `false` |
64-
| `target` | `QWEN_TELEMETRY_TARGET` | `--telemetry-target <local\|gcp>` | Where to send telemetry data | `"gcp"`/`"local"` | `"local"` |
65-
| `otlpEndpoint` | `QWEN_TELEMETRY_OTLP_ENDPOINT` | `--telemetry-otlp-endpoint <URL>` | OTLP collector endpoint | URL string | `http://localhost:4317` |
66-
| `otlpProtocol` | `QWEN_TELEMETRY_OTLP_PROTOCOL` | `--telemetry-otlp-protocol <grpc\|http>` | OTLP transport protocol | `"grpc"`/`"http"` | `"grpc"` |
67-
| `outfile` | `QWEN_TELEMETRY_OUTFILE` | `--telemetry-outfile <path>` | Save telemetry to file (overrides `otlpEndpoint`) | file path | - |
68-
| `logPrompts` | `QWEN_TELEMETRY_LOG_PROMPTS` | `--telemetry-log-prompts` / `--no-telemetry-log-prompts` | Include prompts in telemetry logs | `true`/`false` | `true` |
69-
| `useCollector` | `QWEN_TELEMETRY_USE_COLLECTOR` | - | Use external OTLP collector (advanced) | `true`/`false` | `false` |
61+
| Setting | Environment Variable | CLI Flag | Description | Values | Default |
62+
| --------------------- | -------------------------------------- | -------------------------------------------------------- | ---------------------------------------------------- | ----------------- | ----------------------- |
63+
| `enabled` | `QWEN_TELEMETRY_ENABLED` | `--telemetry` / `--no-telemetry` | Enable or disable telemetry | `true`/`false` | `false` |
64+
| `target` | `QWEN_TELEMETRY_TARGET` | `--telemetry-target <local\|gcp>` | Where to send telemetry data | `"gcp"`/`"local"` | `"local"` |
65+
| `otlpEndpoint` | `QWEN_TELEMETRY_OTLP_ENDPOINT` | `--telemetry-otlp-endpoint <URL>` | OTLP collector endpoint | URL string | `http://localhost:4317` |
66+
| `otlpProtocol` | `QWEN_TELEMETRY_OTLP_PROTOCOL` | `--telemetry-otlp-protocol <grpc\|http>` | OTLP transport protocol | `"grpc"`/`"http"` | `"grpc"` |
67+
| `otlpTracesEndpoint` | `QWEN_TELEMETRY_OTLP_TRACES_ENDPOINT` | - | Per-signal endpoint override for traces (HTTP only) | URL string | - |
68+
| `otlpLogsEndpoint` | `QWEN_TELEMETRY_OTLP_LOGS_ENDPOINT` | - | Per-signal endpoint override for logs (HTTP only) | URL string | - |
69+
| `otlpMetricsEndpoint` | `QWEN_TELEMETRY_OTLP_METRICS_ENDPOINT` | - | Per-signal endpoint override for metrics (HTTP only) | URL string | - |
70+
| `outfile` | `QWEN_TELEMETRY_OUTFILE` | `--telemetry-outfile <path>` | Save telemetry to file (overrides `otlpEndpoint`) | file path | - |
71+
| `logPrompts` | `QWEN_TELEMETRY_LOG_PROMPTS` | `--telemetry-log-prompts` / `--no-telemetry-log-prompts` | Include prompts in telemetry logs | `true`/`false` | `true` |
72+
| `useCollector` | `QWEN_TELEMETRY_USE_COLLECTOR` | - | Use external OTLP collector (advanced) | `true`/`false` | `false` |
7073

7174
**Note on boolean environment variables:** For the boolean settings (`enabled`,
7275
`logPrompts`, `useCollector`), setting the corresponding environment variable to
7376
`true` or `1` will enable the feature. Any other value will disable it.
7477

78+
**HTTP OTLP signal routing:** When using HTTP protocol (`otlpProtocol: "http"`),
79+
Qwen Code automatically appends signal-specific paths (`/v1/traces`, `/v1/logs`,
80+
`/v1/metrics`) to the base `otlpEndpoint`. For example, `http://collector:4318`
81+
becomes `http://collector:4318/v1/traces` for traces. If the URL already ends
82+
with a signal path, it is used as-is. Per-signal endpoint overrides
83+
(`otlpTracesEndpoint`, etc.) take precedence over the base endpoint and are used
84+
verbatim. gRPC protocol uses service-based routing and does not append paths.
85+
86+
The per-signal endpoint environment variables also accept the standard
87+
OpenTelemetry names: `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT`,
88+
`OTEL_EXPORTER_OTLP_LOGS_ENDPOINT`, `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT`.
89+
The `QWEN_TELEMETRY_OTLP_*` variants take precedence over the `OTEL_*` variants.
90+
7591
For detailed information about all configuration options, see the
7692
[Configuration Guide](./cli/configuration.md).
7793

@@ -91,6 +107,9 @@ sent to Alibaba Cloud.
91107

92108
1. Enable telemetry in your `.qwen/settings.json` and set the OTLP
93109
endpoint:
110+
111+
**Option A: gRPC protocol** (standard OTLP endpoint):
112+
94113
```json
95114
{
96115
"telemetry": {
@@ -101,6 +120,29 @@ sent to Alibaba Cloud.
101120
}
102121
}
103122
```
123+
124+
**Option B: HTTP protocol with per-signal endpoints** (for backends
125+
that use non-standard paths, e.g., `/api/otlp/traces` instead of
126+
`/v1/traces`):
127+
128+
```json
129+
{
130+
"telemetry": {
131+
"enabled": true,
132+
"otlpProtocol": "http",
133+
"otlpTracesEndpoint": "http://<host>/<token>/api/otlp/traces",
134+
"otlpLogsEndpoint": "http://<host>/<token>/api/otlp/logs",
135+
"otlpMetricsEndpoint": "http://<host>/<token>/api/otlp/metrics"
136+
}
137+
}
138+
```
139+
140+
> **Note:** When using HTTP protocol with only `otlpEndpoint` (no
141+
> per-signal overrides), Qwen Code appends standard OTLP paths
142+
> (`/v1/traces`, `/v1/logs`, `/v1/metrics`) to the base URL. If your
143+
> backend uses different paths, use per-signal endpoint overrides as
144+
> shown in Option B.
145+
104146
2. If your Alibaba Cloud endpoint requires authentication, provide OTLP
105147
headers through standard OpenTelemetry environment variables such as
106148
`OTEL_EXPORTER_OTLP_HEADERS` (or the signal-specific variants). Qwen

packages/cli/src/gemini.test.tsx

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,11 +493,19 @@ describe('gemini.tsx main function kitty protocol', () => {
493493
let setRawModeSpy: MockInstance<
494494
(mode: boolean) => NodeJS.ReadStream & { fd: 0 }
495495
>;
496+
let initialSigintListeners: NodeJS.SignalsListener[];
497+
let initialSigtermListeners: NodeJS.SignalsListener[];
496498

497499
beforeEach(() => {
498500
// Set no relaunch in tests since process spawning causing issues in tests
499501
originalEnvNoRelaunch = process.env['QWEN_CODE_NO_RELAUNCH'];
500502
process.env['QWEN_CODE_NO_RELAUNCH'] = 'true';
503+
initialSigintListeners = process.listeners(
504+
'SIGINT',
505+
) as NodeJS.SignalsListener[];
506+
initialSigtermListeners = process.listeners(
507+
'SIGTERM',
508+
) as NodeJS.SignalsListener[];
501509

502510
// eslint-disable-next-line @typescript-eslint/no-explicit-any
503511
if (!(process.stdin as any).setRawMode) {
@@ -517,12 +525,24 @@ describe('gemini.tsx main function kitty protocol', () => {
517525
});
518526

519527
afterEach(() => {
528+
for (const listener of process.listeners('SIGINT')) {
529+
if (!initialSigintListeners.includes(listener)) {
530+
process.removeListener('SIGINT', listener as NodeJS.SignalsListener);
531+
}
532+
}
533+
for (const listener of process.listeners('SIGTERM')) {
534+
if (!initialSigtermListeners.includes(listener)) {
535+
process.removeListener('SIGTERM', listener as NodeJS.SignalsListener);
536+
}
537+
}
538+
520539
// Restore original env variables
521540
if (originalEnvNoRelaunch !== undefined) {
522541
process.env['QWEN_CODE_NO_RELAUNCH'] = originalEnvNoRelaunch;
523542
} else {
524543
delete process.env['QWEN_CODE_NO_RELAUNCH'];
525544
}
545+
vi.restoreAllMocks();
526546
});
527547

528548
it('should call setRawMode and detectAndEnableKittyProtocol when isInteractive is true', async () => {
@@ -618,6 +638,79 @@ describe('gemini.tsx main function kitty protocol', () => {
618638
expect(setRawModeSpy).toHaveBeenCalledWith(true);
619639
expect(detectAndEnableKittyProtocol).toHaveBeenCalledTimes(1);
620640
});
641+
642+
it('should run cleanup before exiting on interactive SIGINT', async () => {
643+
const { loadCliConfig, parseArguments } = await import(
644+
'./config/config.js'
645+
);
646+
const { loadSettings } = await import('./config/settings.js');
647+
const cleanupModule = await import('./utils/cleanup.js');
648+
const signalHandlers = new Map<string, (...args: unknown[]) => void>();
649+
const processOnceSpy = vi.spyOn(process, 'once').mockImplementation(((
650+
eventName: string | symbol,
651+
listener: (...args: unknown[]) => void,
652+
) => {
653+
if (eventName === 'SIGTERM' || eventName === 'SIGINT') {
654+
signalHandlers.set(eventName, listener);
655+
}
656+
return process;
657+
}) as typeof process.once);
658+
const processExitSpy = vi
659+
.spyOn(process, 'exit')
660+
.mockImplementation((() => undefined) as unknown as typeof process.exit);
661+
const runExitCleanupMock = vi.mocked(cleanupModule.runExitCleanup);
662+
runExitCleanupMock.mockResolvedValue(undefined);
663+
664+
vi.mocked(loadCliConfig).mockResolvedValue({
665+
isInteractive: () => true,
666+
getQuestion: () => '',
667+
getSandbox: () => false,
668+
getDebugMode: () => false,
669+
getListExtensions: () => false,
670+
getMcpServers: () => ({}),
671+
initialize: vi.fn(),
672+
getIdeMode: () => false,
673+
getExperimentalZedIntegration: () => false,
674+
getScreenReader: () => false,
675+
getGeminiMdFileCount: () => 0,
676+
getWarnings: () => [],
677+
getModelsConfig: () => ({
678+
getCurrentAuthType: () => null,
679+
getGenerationConfig: () => ({}),
680+
}),
681+
getProxy: () => undefined,
682+
getUsageStatisticsEnabled: () => true,
683+
getSessionId: () => 'test-session-id',
684+
} as unknown as Config);
685+
vi.mocked(loadSettings).mockReturnValue({
686+
errors: [],
687+
merged: {
688+
advanced: {},
689+
security: { auth: {} },
690+
ui: {},
691+
},
692+
setValue: vi.fn(),
693+
forScope: () => ({ settings: {}, originalSettings: {}, path: '' }),
694+
migrationWarnings: [],
695+
getUserHooks: () => undefined,
696+
getProjectHooks: () => undefined,
697+
} as never);
698+
vi.mocked(parseArguments).mockResolvedValue({
699+
extensions: undefined,
700+
} as never);
701+
702+
await main();
703+
signalHandlers.get('SIGINT')?.();
704+
await Promise.resolve();
705+
await Promise.resolve();
706+
707+
expect(setRawModeSpy).toHaveBeenCalledWith(false);
708+
expect(runExitCleanupMock).toHaveBeenCalledTimes(1);
709+
expect(processExitSpy).toHaveBeenCalledWith(130);
710+
711+
processOnceSpy.mockRestore();
712+
processExitSpy.mockRestore();
713+
});
621714
});
622715

623716
describe('validateDnsResolutionOrder', () => {

packages/cli/src/gemini.tsx

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,48 @@ ${reason.stack}`
164164
});
165165
}
166166

167+
function getSignalExitCode(signal: NodeJS.Signals): number {
168+
return signal === 'SIGINT' ? 130 : 143;
169+
}
170+
171+
function installInteractiveSignalHandlers(wasRaw: boolean): () => void {
172+
let cleanupStarted = false;
173+
174+
const handleSignal = (signal: NodeJS.Signals) => {
175+
if (process.stdin.isTTY) {
176+
process.stdin.setRawMode(wasRaw);
177+
}
178+
179+
if (cleanupStarted) {
180+
return;
181+
}
182+
cleanupStarted = true;
183+
184+
void runExitCleanup()
185+
.catch((error) => {
186+
debugLogger.error(`Error during ${signal} cleanup:`, error);
187+
})
188+
.finally(() => {
189+
process.exit(getSignalExitCode(signal));
190+
});
191+
};
192+
193+
const handleSigterm = () => {
194+
handleSignal('SIGTERM');
195+
};
196+
const handleSigint = () => {
197+
handleSignal('SIGINT');
198+
};
199+
200+
process.once('SIGTERM', handleSigterm);
201+
process.once('SIGINT', handleSigint);
202+
203+
return () => {
204+
process.removeListener('SIGTERM', handleSigterm);
205+
process.removeListener('SIGINT', handleSigint);
206+
};
207+
}
208+
167209
export async function startInteractiveUI(
168210
config: Config,
169211
settings: LoadedSettings,
@@ -559,6 +601,9 @@ export async function main() {
559601
const wasRaw = process.stdin.isRaw;
560602
let kittyProtocolDetectionComplete: Promise<boolean> | undefined;
561603
let themeAutoDetectionComplete: Promise<void> | undefined;
604+
if (config.isInteractive()) {
605+
registerCleanup(installInteractiveSignalHandlers(wasRaw));
606+
}
562607
if (config.isInteractive() && !wasRaw && process.stdin.isTTY) {
563608
// Set this as early as possible to avoid spurious characters from
564609
// input showing up in the output.
@@ -569,14 +614,6 @@ export async function main() {
569614
// Ensure the stdin listener is removed on any exit path (error, signal, etc.)
570615
registerCleanup(() => stopAndGetCapturedInput());
571616

572-
// This cleanup isn't strictly needed but may help in certain situations.
573-
process.on('SIGTERM', () => {
574-
process.stdin.setRawMode(wasRaw);
575-
});
576-
process.on('SIGINT', () => {
577-
process.stdin.setRawMode(wasRaw);
578-
});
579-
580617
// Detect and enable Kitty keyboard protocol once at startup.
581618
kittyProtocolDetectionComplete = detectAndEnableKittyProtocol();
582619

packages/core/src/config/config.test.ts

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ import {
1515
DEFAULT_TELEMETRY_TARGET,
1616
DEFAULT_OTLP_ENDPOINT,
1717
QwenLogger,
18+
isTelemetrySdkInitialized,
19+
shutdownTelemetry,
1820
} from '../telemetry/index.js';
1921
import type {
2022
ContentGenerator,
@@ -177,6 +179,8 @@ vi.mock('../telemetry/index.js', async (importOriginal) => {
177179
return {
178180
...actual,
179181
initializeTelemetry: vi.fn(),
182+
isTelemetrySdkInitialized: vi.fn(() => false),
183+
shutdownTelemetry: vi.fn().mockResolvedValue(undefined),
180184
uiTelemetryService: {
181185
getLastPromptTokenCount: vi.fn(),
182186
},
@@ -275,6 +279,7 @@ describe('Server Config (config.ts)', () => {
275279
beforeEach(() => {
276280
// Reset mocks if necessary
277281
vi.clearAllMocks();
282+
vi.mocked(isTelemetrySdkInitialized).mockReturnValue(false);
278283
vi.spyOn(QwenLogger.prototype, 'logStartSessionEvent').mockImplementation(
279284
async () => undefined,
280285
);
@@ -908,6 +913,32 @@ describe('Server Config (config.ts)', () => {
908913
expect(config.getTelemetryEnabled()).toBe(true);
909914
});
910915

916+
it('Config shutdown should flush telemetry when SDK is initialized', async () => {
917+
const paramsWithTelemetry: ConfigParameters = {
918+
...baseParams,
919+
telemetry: { enabled: true },
920+
};
921+
vi.mocked(isTelemetrySdkInitialized).mockReturnValue(true);
922+
const config = new Config(paramsWithTelemetry);
923+
924+
await config.shutdown();
925+
926+
expect(shutdownTelemetry).toHaveBeenCalledTimes(1);
927+
});
928+
929+
it('Config shutdown should skip telemetry shutdown before SDK initialization', async () => {
930+
const paramsWithTelemetry: ConfigParameters = {
931+
...baseParams,
932+
telemetry: { enabled: true },
933+
};
934+
vi.mocked(isTelemetrySdkInitialized).mockReturnValue(false);
935+
const config = new Config(paramsWithTelemetry);
936+
937+
await config.shutdown();
938+
939+
expect(shutdownTelemetry).not.toHaveBeenCalled();
940+
});
941+
911942
it('Config constructor should set telemetry to false when provided as false', () => {
912943
const paramsWithTelemetry: ConfigParameters = {
913944
...baseParams,
@@ -1083,6 +1114,41 @@ describe('Server Config (config.ts)', () => {
10831114
});
10841115
});
10851116

1117+
describe('Per-Signal OTLP Endpoint Configuration', () => {
1118+
it('should return per-signal endpoints when provided', () => {
1119+
const params: ConfigParameters = {
1120+
...baseParams,
1121+
telemetry: {
1122+
enabled: true,
1123+
otlpTracesEndpoint: 'http://traces:4318/v1/traces',
1124+
otlpLogsEndpoint: 'http://logs:4318/v1/logs',
1125+
otlpMetricsEndpoint: 'http://metrics:4318/v1/metrics',
1126+
},
1127+
};
1128+
const config = new Config(params);
1129+
expect(config.getTelemetryOtlpTracesEndpoint()).toBe(
1130+
'http://traces:4318/v1/traces',
1131+
);
1132+
expect(config.getTelemetryOtlpLogsEndpoint()).toBe(
1133+
'http://logs:4318/v1/logs',
1134+
);
1135+
expect(config.getTelemetryOtlpMetricsEndpoint()).toBe(
1136+
'http://metrics:4318/v1/metrics',
1137+
);
1138+
});
1139+
1140+
it('should return undefined when per-signal endpoints are not provided', () => {
1141+
const params: ConfigParameters = {
1142+
...baseParams,
1143+
telemetry: { enabled: true },
1144+
};
1145+
const config = new Config(params);
1146+
expect(config.getTelemetryOtlpTracesEndpoint()).toBeUndefined();
1147+
expect(config.getTelemetryOtlpLogsEndpoint()).toBeUndefined();
1148+
expect(config.getTelemetryOtlpMetricsEndpoint()).toBeUndefined();
1149+
});
1150+
});
1151+
10861152
describe('UseRipgrep Configuration', () => {
10871153
it('should default useRipgrep to true when not provided', () => {
10881154
const config = new Config(baseParams);

0 commit comments

Comments
 (0)