langchain-ai
diff --git a/‎AGENTS.md‎
Lines changed: 1 addition & 0 deletions b/‎AGENTS.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎langsmith-java-client-okhttp/src/main/kotlin/com/langchain/smith/client/okhttp/LangsmithOkHttpClient.kt‎
Lines changed: 10 additions & 0 deletions b/‎langsmith-java-client-okhttp/src/main/kotlin/com/langchain/smith/client/okhttp/LangsmithOkHttpClient.kt‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎langsmith-java-client-okhttp/src/main/kotlin/com/langchain/smith/client/okhttp/LangsmithOkHttpClientAsync.kt‎
Lines changed: 10 additions & 0 deletions b/‎langsmith-java-client-okhttp/src/main/kotlin/com/langchain/smith/client/okhttp/LangsmithOkHttpClientAsync.kt‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎langsmith-java-core/src/main/kotlin/com/langchain/smith/client/AutoBatchQueue.kt‎
Lines changed: 356 additions & 0 deletions b/‎langsmith-java-core/src/main/kotlin/com/langchain/smith/client/AutoBatchQueue.kt‎
Lines changed: 356 additions & 0 deletions
@@ -229,6 +229,7 @@ Tests skip gracefully via `assumeTrue` if keys are missing.
 
 ## Code style
 
+- For cross-method concurrency coordination, prefer an explicit named `ReentrantLock` with `lock()` / `try` / `finally { unlock() }` over `synchronized` when review clarity matters. Keep the locked section minimal and do slow/blocking work outside the lock.
 - `toString()` should be single-line, following the `ClassName{field=value, field=value}` convention used by the rest of the SDK.
 - Avoid `@Suppress("UNCHECKED_CAST")` — restructure code to use safe patterns (`as? String`, `is Map<*, *>` with `entries.associate`, etc). When unavoidable (e.g. generic type erasure after an `is` check), add a comment explaining why the cast is safe.
 - Use named arguments for constructor/function calls with 2+ parameters, especially when types could be confused:
 
@@ -271,6 +271,16 @@ class LangsmithOkHttpClient private constructor() {
          */
         fun maxRetries(maxRetries: Int) = apply { clientOptions.maxRetries(maxRetries) }
 
+        /**
+         * Whether run create/update calls should be automatically batched for tracing.
+         *
+         * Defaults to true. Set to false to send run create/update calls synchronously through the
+         * single-run endpoints.
+         */
+        fun autoBatchTracing(autoBatchTracing: Boolean) = apply {
+            clientOptions.autoBatchTracing(autoBatchTracing)
+        }
+
         fun apiKey(apiKey: String?) = apply { clientOptions.apiKey(apiKey) }
 
         /** Alias for calling [Builder.apiKey] with `apiKey.orElse(null)`. */
 
@@ -271,6 +271,16 @@ class LangsmithOkHttpClientAsync private constructor() {
          */
         fun maxRetries(maxRetries: Int) = apply { clientOptions.maxRetries(maxRetries) }
 
+        /**
+         * Whether run create/update calls should be automatically batched for tracing.
+         *
+         * Defaults to true. Set to false to send run create/update calls through the single-run
+         * endpoints.
+         */
+        fun autoBatchTracing(autoBatchTracing: Boolean) = apply {
+            clientOptions.autoBatchTracing(autoBatchTracing)
+        }
+
         fun apiKey(apiKey: String?) = apply { clientOptions.apiKey(apiKey) }
 
         /** Alias for calling [Builder.apiKey] with `apiKey.orElse(null)`. */
 
@@ -0,0 +1,356 @@
+package com.langchain.smith.client
+
+import com.langchain.smith.core.RequestOptions
+import com.langchain.smith.core.Timeout
+import com.langchain.smith.core.http.Headers
+import com.langchain.smith.core.http.QueryParams
+import com.langchain.smith.models.runs.Run
+import com.langchain.smith.models.runs.RunIngestBatchParams
+import java.util.concurrent.CompletionException
+import java.util.concurrent.CompletionStage
+import java.util.concurrent.ConcurrentLinkedQueue
+import java.util.concurrent.ExecutionException
+import java.util.concurrent.ExecutorService
+import java.util.concurrent.Executors
+import java.util.concurrent.Phaser
+import java.util.concurrent.RejectedExecutionException
+import java.util.concurrent.ScheduledExecutorService
+import java.util.concurrent.TimeUnit
+import java.util.concurrent.atomic.AtomicBoolean
+import java.util.concurrent.atomic.AtomicInteger
+import java.util.concurrent.locks.ReentrantLock
+import org.slf4j.LoggerFactory
+
+/**
+ * Batches run create/update operations and sends them to LangSmith in a single `ingestBatch`
+ * request, reducing HTTP overhead.
+ *
+ * Operations are buffered and flushed either:
+ * - When the buffer reaches [batchSizeLimit] operations
+ * - After [aggregationDelayMs] milliseconds of inactivity (timer-based drain)
+ * - When [flush] is called explicitly
+ *
+ * @param sendBatch sends a batch and completes when the send has finished
+ * @param batchSizeLimit max operations before auto-flush (default 100)
+ * @param aggregationDelayMs delay before timer-based flush (default 250ms)
+ * @param sendParallelism max number of batch requests to send concurrently (default 4)
+ */
+class AutoBatchQueue(
+    private val sendBatch: (RunIngestBatchParams, RequestOptions) -> CompletionStage<Void?>,
+    private val batchSizeLimit: Int = DEFAULT_BATCH_SIZE_LIMIT,
+    private val aggregationDelayMs: Long = DEFAULT_AGGREGATION_DELAY_MS,
+    private val sendParallelism: Int = DEFAULT_SEND_PARALLELISM,
+) {
+    private val items = ConcurrentLinkedQueue<BatchItem>()
+    private val queuedCount = AtomicInteger(0)
+    private val shutdown = AtomicBoolean(false)
+    private val delayedFlushScheduled = AtomicBoolean(false)
+    private val enqueueShutdownLock = ReentrantLock()
+    private val activeSends =
+        object : Phaser(0) {
+            override fun onAdvance(phase: Int, registeredParties: Int): Boolean = false
+        }
+
+    private val coordinator: ScheduledExecutorService =
+        Executors.newSingleThreadScheduledExecutor { r ->
+            Thread(r, "langsmith-batch-coordinator").apply { isDaemon = true }
+        }
+
+    private val sendExecutor: ExecutorService =
+        Executors.newFixedThreadPool(sendParallelism) { r ->
+            Thread(r, "langsmith-batch-sender").apply { isDaemon = true }
+        }
+
+    /** Enqueues a run create operation. */
+    fun post(
+        run: Run,
+        headers: Headers = Headers.builder().build(),
+        queryParams: QueryParams = QueryParams.builder().build(),
+        requestOptions: RequestOptions = RequestOptions.none(),
+    ) {
+        enqueue(BatchOp.Post, run, headers, queryParams, requestOptions)
+    }
+
+    /** Enqueues a run update (patch) operation. */
+    fun patch(
+        run: Run,
+        headers: Headers = Headers.builder().build(),
+        queryParams: QueryParams = QueryParams.builder().build(),
+        requestOptions: RequestOptions = RequestOptions.none(),
+    ) {
+        enqueue(BatchOp.Patch, run, headers, queryParams, requestOptions)
+    }
+
+    /**
+     * Flushes all queued operations immediately, blocking until batch requests that were queued or
+     * already in-flight have completed.
+     *
+     * Safe to call from any thread. No-op if the queue is empty.
+     */
+    fun flush() {
+        while (true) {
+            if (queuedCount.get() > 0 && !drainOnCoordinator()) {
+                return
+            }
+
+            if (!waitForActiveSends()) {
+                return
+            }
+
+            if (queuedCount.get() == 0 && !hasActiveSends()) {
+                return
+            }
+        }
+    }
+
+    /**
+     * Flushes remaining operations and shuts down the background executors.
+     *
+     * After calling this, the queue will no longer accept new operations.
+     */
+    fun shutdown() {
+        enqueueShutdownLock.lock()
+        try {
+            // Serialize with enqueue's check-and-add so flush cannot miss an item that observed
+            // shutdown=false but has not yet been queued.
+            if (!shutdown.compareAndSet(false, true)) return
+        } finally {
+            enqueueShutdownLock.unlock()
+        }
+
+        flush()
+        coordinator.shutdown()
+        sendExecutor.shutdown()
+
+        try {
+            if (!coordinator.awaitTermination(5, TimeUnit.SECONDS)) {
+                coordinator.shutdownNow()
+            }
+            if (!sendExecutor.awaitTermination(5, TimeUnit.SECONDS)) {
+                sendExecutor.shutdownNow()
+            }
+        } catch (_: InterruptedException) {
+            coordinator.shutdownNow()
+            sendExecutor.shutdownNow()
+            Thread.currentThread().interrupt()
+        }
+    }
+
+    /** Returns the number of queued operations (for testing). */
+    internal fun size(): Int = queuedCount.get()
+
+    private fun enqueue(
+        op: BatchOp,
+        run: Run,
+        headers: Headers,
+        queryParams: QueryParams,
+        requestOptions: RequestOptions,
+    ) {
+        val count = run {
+            enqueueShutdownLock.lock()
+            try {
+                check(!shutdown.get()) { "AutoBatchQueue is shut down" }
+                items.add(
+                    BatchItem(
+                        op = op,
+                        run = run,
+                        headers = headers,
+                        queryParams = queryParams,
+                        requestOptions = requestOptions,
+                    )
+                )
+                queuedCount.incrementAndGet()
+            } finally {
+                enqueueShutdownLock.unlock()
+            }
+        }
+
+        afterEnqueue(count)
+    }
+
+    private fun afterEnqueue(count: Int) {
+        if (count >= batchSizeLimit) {
+            triggerFlush()
+        } else {
+            scheduleFlush()
+        }
+    }
+
+    private fun scheduleFlush() {
+        if (!delayedFlushScheduled.compareAndSet(false, true)) return
+
+        try {
+            coordinator.schedule(
+                {
+                    delayedFlushScheduled.set(false)
+                    drainAndSubmitSends()
+                },
+                aggregationDelayMs,
+                TimeUnit.MILLISECONDS,
+            )
+        } catch (e: RejectedExecutionException) {
+            delayedFlushScheduled.set(false)
+            logger.warn("Batch queue coordinator rejected delayed flush", e)
+        }
+    }
+
+    private fun triggerFlush() {
+        try {
+            coordinator.execute { drainAndSubmitSends() }
+        } catch (e: RejectedExecutionException) {
+            logger.warn("Batch queue coordinator rejected flush", e)
+        }
+    }
+
+    private fun drainOnCoordinator(): Boolean {
+        val drainFuture =
+            try {
+                coordinator.submit { drainAndSubmitSends() }
+            } catch (e: RejectedExecutionException) {
+                throw IllegalStateException("Batch queue coordinator rejected flush", e)
+            }
+
+        try {
+            drainFuture.get()
+            return true
+        } catch (_: InterruptedException) {
+            Thread.currentThread().interrupt()
+            return false
+        } catch (e: ExecutionException) {
+            throw RuntimeException("Failed to flush batch queue", e.cause)
+        }
+    }
+
+    private fun drainAndSubmitSends() {
+        val batches = drainUpTo(batchSizeLimit)
+        if (batches.isEmpty()) return
+
+        batches.forEach(::submitBatch)
+
+        when {
+            queuedCount.get() >= batchSizeLimit -> triggerFlush()
+            queuedCount.get() > 0 && !shutdown.get() -> scheduleFlush()
+            queuedCount.get() > 0 && shutdown.get() -> triggerFlush()
+        }
+    }
+
+    /**
+     * Drains up to [maxItems] queued operations and returns batch params grouped by request
+     * options.
+     *
+     * TODO: Merge create + update for the same run ID before sending (like the JS/Python SDKs).
+     *   This would reduce the number of operations in each batch when a run is created and
+     *   immediately updated (common for short-lived runs).
+     * TODO: Also flush/split batches based on serialized payload size, not just operation count.
+     * TODO: Support multipart ingest endpoint for large payloads with attachments.
+     * TODO: Support gzip compression for batch requests.
+     */
+    private fun drainUpTo(maxItems: Int): List<Batch> {
+        val groups = linkedMapOf<RequestOptionsKey, BatchGroup>()
+        var drained = 0
+
+        while (drained < maxItems) {
+            val item = items.poll() ?: break
+            queuedCount.decrementAndGet()
+            drained++
+
+            val group =
+                groups.getOrPut(item.requestOptions.key()) { BatchGroup(item.requestOptions) }
+            when (item.op) {
+                BatchOp.Post -> group.posts.add(item.run)
+                BatchOp.Patch -> group.patches.add(item.run)
+            }
+            group.headers.putAll(item.headers)
+            group.queryParams.putAll(item.queryParams)
+        }
+
+        return groups.values.map { it.toBatch() }
+    }
+
+    private fun submitBatch(batch: Batch) {
+        activeSends.register()
+        try {
+            sendExecutor.execute {
+                try {
+                    sendBatch(batch.params, batch.requestOptions).toCompletableFuture().join()
+                } catch (e: CompletionException) {
+                    logger.warn("Failed to send batch of runs", e.cause ?: e)
+                } catch (e: Exception) {
+                    logger.warn("Failed to send batch of runs", e)
+                } finally {
+                    activeSends.arriveAndDeregister()
+                }
+            }
+        } catch (e: RejectedExecutionException) {
+            activeSends.arriveAndDeregister()
+            logger.warn(
+                "Batch queue sender rejected a batch; dropping {} run operations",
+                operationCount(batch.params),
+                e,
+            )
+        }
+    }
+
+    private fun waitForActiveSends(): Boolean {
+        while (hasActiveSends()) {
+            val phase = activeSends.phase
+            try {
+                activeSends.awaitAdvanceInterruptibly(phase)
+            } catch (_: InterruptedException) {
+                Thread.currentThread().interrupt()
+                return false
+            }
+        }
+        return true
+    }
+
+    private fun hasActiveSends(): Boolean = activeSends.registeredParties > 0
+
+    private fun operationCount(params: RunIngestBatchParams): Int =
+        params.post().orElse(emptyList()).size + params.patch().orElse(emptyList()).size
+
+    private enum class BatchOp {
+        Post,
+        Patch,
+    }
+
+    private data class Batch(val params: RunIngestBatchParams, val requestOptions: RequestOptions)
+
+    private data class BatchGroup(
+        val requestOptions: RequestOptions,
+        val posts: MutableList<Run> = mutableListOf(),
+        val patches: MutableList<Run> = mutableListOf(),
+        val headers: Headers.Builder = Headers.builder(),
+        val queryParams: QueryParams.Builder = QueryParams.builder(),
+    ) {
+        fun toBatch(): Batch {
+            val builder = RunIngestBatchParams.builder()
+            if (posts.isNotEmpty()) builder.post(posts)
+            if (patches.isNotEmpty()) builder.patch(patches)
+            builder.additionalHeaders(headers.build())
+            builder.additionalQueryParams(queryParams.build())
+            return Batch(params = builder.build(), requestOptions = requestOptions)
+        }
+    }
+
+    private data class BatchItem(
+        val op: BatchOp,
+        val run: Run,
+        val headers: Headers,
+        val queryParams: QueryParams,
+        val requestOptions: RequestOptions,
+    )
+
+    private data class RequestOptionsKey(val responseValidation: Boolean?, val timeout: Timeout?)
+
+    private fun RequestOptions.key(): RequestOptionsKey =
+        RequestOptionsKey(responseValidation = responseValidation, timeout = timeout)
+
+    companion object {
+        private val logger = LoggerFactory.getLogger(AutoBatchQueue::class.java)
+
+        const val DEFAULT_BATCH_SIZE_LIMIT = 100
+        const val DEFAULT_AGGREGATION_DELAY_MS = 250L
+        const val DEFAULT_SEND_PARALLELISM = 4
+    }
+}