[Evaluation] Recover partial red team results when Foundry execution raises (#45541)

slister1001 · Copilot · web-flow · commit fb6b3cade77d · 2026-03-09T18:05:24.000-04:00
* [Evaluation] Recover partial red team results when Foundry execution raises

When orchestrator.execute() raises (e.g., ConnectTimeout on 1 of 50
objectives), attempt to recover partial results from the orchestrator
before falling back to the empty-result error path.

Previously, any single objective failure caused the entire risk category's
results to be discarded (data_file set to empty string, 0 results returned).
Now, completed objectives are processed through the normal
FoundryResultProcessor pipeline and included in the final output.

The error is demoted from ERROR to WARNING when partial results are
available, since it is not a total failure. The original full-failure
path is preserved when get_attack_results() returns empty.

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;

* Address review comments: add debug logging, structured partial_failure info

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;

* Apply black formatting

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;

---------

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_execution_manager.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_execution_manager.py
@@ -162,19 +162,45 @@ async def execute_attacks(
                         include_baseline=include_baseline,
                     )
                 except Exception as e:
-                    self.logger.error(f"Error executing attacks for {risk_value}: {e}")
-                    # Use "Foundry" as fallback strategy name to match expected structure
-                    if "Foundry" not in red_team_info:
-                        red_team_info["Foundry"] = {}
-                    red_team_info["Foundry"][risk_value] = {
-                        "data_file": "",
-                        "status": "failed",
-                        "error": str(e),
-                        "asr": 0.0,
-                    }
-                    continue
+                    # Attempt to recover partial results before giving up.
+                    # partial_results is used only as a truthiness check here;
+                    # FoundryResultProcessor re-retrieves results via orchestrator.get_attack_results().
+                    partial_results = []
+                    try:
+                        partial_results = orchestrator.get_attack_results()
+                    except Exception:
+                        self.logger.debug("Failed to recover partial results for %s", risk_value, exc_info=True)
+
+                    if partial_results:
+                        self.logger.warning(
+                            f"Partial failure executing attacks for {risk_value}: {e}. "
+                            f"Recovered {len(partial_results)} partial results."
+                        )
+                        # Record partial failure in structured output so callers
+                        # relying on red_team_info can observe it.
+                        if "Foundry" not in red_team_info:
+                            red_team_info["Foundry"] = {}
+                        red_team_info["Foundry"][risk_value] = {
+                            "data_file": "",
+                            "status": "partial_failure",
+                            "error": str(e),
+                            "partial_failure": True,
+                            "asr": 0.0,
+                        }
+                    else:
+                        self.logger.error(f"Error executing attacks for {risk_value}: {e}")
+                        # No results recoverable — use empty fallback
+                        if "Foundry" not in red_team_info:
+                            red_team_info["Foundry"] = {}
+                        red_team_info["Foundry"][risk_value] = {
+                            "data_file": "",
+                            "status": "failed",
+                            "error": str(e),
+                            "asr": 0.0,
+                        }
+                        continue
 
-                # Process results
+                # Process results (handles both full success and partial recovery)
                 result_processor = FoundryResultProcessor(
                     scenario=orchestrator,
                     dataset_config=dataset_config,