Skip to content

Commit 209097d

Browse files
committed
fix: handle lazy AnnData obs conversion in query operations
When lazy AnnData objects (from anndata.experimental.read_lazy) are subset, their obs attribute is a Dataset2D object, not a pandas DataFrame. Using pd.DataFrame(table.obs) produces a malformed DataFrame. This fix uses table.obs.to_memory() for lazy tables to properly convert Dataset2D to DataFrame while preserving all column data. Files modified: - relational_query.py: _filter_table_by_element_names, _filter_table_by_elements, get_values - _utils.py: _inplace_fix_subset_categorical_obs
1 parent 120d11d commit 209097d

2 files changed

Lines changed: 34 additions & 5 deletions

File tree

src/spatialdata/_core/query/relational_query.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,14 @@ def _filter_table_by_element_names(table: AnnData | None, element_names: str | l
7575
return None
7676
table_mapping_metadata = table.uns[TableModel.ATTRS_KEY]
7777
region_key = table_mapping_metadata[TableModel.REGION_KEY_KEY]
78-
table.obs = pd.DataFrame(table.obs)
78+
# Filter first, then materialize obs to avoid shape mismatch with lazy tables
7979
table = table[table.obs[region_key].isin(element_names)].copy()
80+
# Handle lazy tables (Dataset2D) vs eager tables (DataFrame)
81+
if isinstance(table.obs, pd.DataFrame):
82+
table.obs = pd.DataFrame(table.obs)
83+
else:
84+
# Lazy AnnData uses Dataset2D which needs to_memory() to convert properly
85+
table.obs = table.obs.to_memory()
8086
table.uns[TableModel.ATTRS_KEY][TableModel.REGION_KEY] = table.obs[region_key].unique().tolist()
8187
return table
8288

@@ -196,8 +202,14 @@ def _filter_table_by_elements(
196202
indices = ((table.obs[region_key] == name) & (table.obs[instance_key].isin(instances))).to_numpy()
197203
to_keep = to_keep | indices
198204
original_table = table
199-
table.obs = pd.DataFrame(table.obs)
205+
# Subset first, then materialize obs to avoid shape mismatch with lazy tables
200206
table = table[to_keep, :]
207+
# Handle lazy tables (Dataset2D) vs eager tables (DataFrame)
208+
if isinstance(table.obs, pd.DataFrame):
209+
table.obs = pd.DataFrame(table.obs)
210+
else:
211+
# Lazy AnnData uses Dataset2D which needs to_memory() to convert properly
212+
table.obs = table.obs.to_memory()
201213
if match_rows:
202214
assert instances is not None
203215
assert isinstance(instances, np.ndarray)
@@ -1066,7 +1078,12 @@ def get_values(
10661078
if origin == "obs":
10671079
df = obs[value_key_values].copy()
10681080
if origin == "var":
1069-
matched_table.obs = pd.DataFrame(obs)
1081+
# Handle lazy tables (Dataset2D) vs eager tables (DataFrame)
1082+
if isinstance(obs, pd.DataFrame):
1083+
matched_table.obs = pd.DataFrame(obs)
1084+
else:
1085+
# Lazy AnnData uses Dataset2D which needs to_memory() to convert properly
1086+
matched_table.obs = obs.to_memory()
10701087
if table_layer is None:
10711088
x = matched_table[:, value_key_values].X
10721089
else:

src/spatialdata/_utils.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,11 +210,23 @@ def _inplace_fix_subset_categorical_obs(subset_adata: AnnData, original_adata: A
210210
"""
211211
if not hasattr(subset_adata, "obs") or not hasattr(original_adata, "obs"):
212212
return
213-
obs = pd.DataFrame(subset_adata.obs)
213+
# Handle lazy tables (Dataset2D) vs eager tables (DataFrame)
214+
if isinstance(subset_adata.obs, pd.DataFrame):
215+
obs = pd.DataFrame(subset_adata.obs)
216+
else:
217+
# Lazy AnnData uses Dataset2D which needs to_memory() to convert properly
218+
obs = subset_adata.obs.to_memory()
219+
220+
# Also handle lazy original_adata.obs
221+
if isinstance(original_adata.obs, pd.DataFrame):
222+
original_obs = original_adata.obs
223+
else:
224+
original_obs = original_adata.obs.to_memory()
225+
214226
for column in obs.columns:
215227
is_categorical = isinstance(obs[column].dtype, pd.CategoricalDtype)
216228
if is_categorical:
217-
c = obs[column].cat.set_categories(original_adata.obs[column].cat.categories)
229+
c = obs[column].cat.set_categories(original_obs[column].cat.categories)
218230
obs[column] = c
219231
subset_adata.obs = obs
220232

0 commit comments

Comments
 (0)