diff --git a/requirements.txt b/requirements.txt index e96fa61ee..aa528c18a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,5 @@ pyyaml>=6 pandas gradio gitpython # for the reproducibility script -requests \ No newline at end of file +requests +matplotlib \ No newline at end of file diff --git a/src/agentlab/analyze/agent_xray.py b/src/agentlab/analyze/agent_xray.py index 228901b39..8c43ef046 100644 --- a/src/agentlab/analyze/agent_xray.py +++ b/src/agentlab/analyze/agent_xray.py @@ -482,7 +482,8 @@ def run_gradio(results_dir: Path): tabs.select(tab_select) demo.queue() - demo.launch(server_port=int(os.getenv("AGENTXRAY_APP_PORT", 7899)), share=True) + do_share = os.getenv("AGENTXRAY_SHARE_SERVER", "false").lower() == "true" + demo.launch(server_port=int(os.getenv("AGENTXRAY_APP_PORT", 7899)), share=do_share) def tab_select(evt: gr.SelectData): diff --git a/src/agentlab/analyze/inspect_results.py b/src/agentlab/analyze/inspect_results.py index 7d46113c9..d33cc756e 100644 --- a/src/agentlab/analyze/inspect_results.py +++ b/src/agentlab/analyze/inspect_results.py @@ -245,9 +245,9 @@ def get_std_err(df, metric): if np.all(np.isin(data, [0, 1])): mean = np.mean(data) std_err = np.sqrt(mean * (1 - mean) / len(data)) + return mean, std_err else: return get_sample_std_err(df, metric) - return mean, std_err def get_sample_std_err(df, metric): @@ -258,7 +258,7 @@ def get_sample_std_err(df, metric): mean = np.mean(data) std_err = np.std(data, ddof=1) / np.sqrt(len(data)) if np.isnan(std_err): - std_err = 0 + std_err = np.float64(0) return mean, std_err