Files
app-store-optimization/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pydata__xarray-4248.patch

124 lines
4.8 KiB
Diff

Based on the context of the GitHub issue, I need to create a patch for the xarray repository. Since I don't have direct access to the xarray source, I'll create the patch based on my knowledge of the xarray codebase structure. The key file is `xarray/core/formatting.py` which handles the repr formatting.
Let me create a comprehensive patch that addresses the feature request to show units in the dataset overview:
```
--- a/xarray/core/formatting.py
+++ b/xarray/core/formatting.py
@@ -261,6 +261,25 @@ def inline_dask_repr(array):
return f"dask.array<chunksize={chunksize}>"
+def inline_variable_array_repr(var, max_width):
+ """Build a one-line summary of a variable's data."""
+ if var._in_memory:
+ return format_array_flat(var, max_width)
+ elif hasattr(var._data, "_repr_inline_"):
+ return var._data._repr_inline_(max_width)
+ elif isinstance(var._data, dask_array_type):
+ return inline_dask_repr(var.data)
+ elif hasattr(var._data, "__dask_graph__"):
+ return inline_dask_repr(var.data)
+ elif hasattr(var._data, "_repr_inline_"):
+ return var._data._repr_inline_(max_width)
+ else:
+ # fallback to format_array_flat
+ return format_array_flat(var, max_width)
+
+
+def _get_units_from_attrs(attrs):
+ """Extract units from attributes if present."""
+ return attrs.get("units", "")
+
+
def summarize_variable(
- name: Hashable, var, col_width: int, marker: str = " ", max_width: int = None
+ name: Hashable, var, col_width: int, marker: str = " ", max_width: int = None, show_units: bool = True
):
"""Summarize a variable in one line, e.g., for the Dataset repr."""
if max_width is None:
@@ -268,10 +287,28 @@ def summarize_variable(
first_col = pretty_print(f" {marker} {name} ", col_width)
dims_str = "({}) ".format(", ".join(map(str, var.dims)))
- dims_width = col_width - len(googdims_str)
+ dims_width = col_width - len(dims_str)
dtype_width = col_width + 12
- data_repr = inline_variable_array_repr(var, max_width - dtype_width)
+ # Get units from attrs or from duck array
+ units_str = ""
+ if show_units:
+ if hasattr(var.data, "units"):
+ # Duck array with units (e.g., pint)
+ units_str = f" [{var.data.units}]"
+ elif "units" in var.attrs:
+ # Units from attrs
+ units_str = f" [{var.attrs['units']}]"
+
+ # Calculate remaining width for data repr
+ units_width = len(units_str)
+ remaining_width = max_width - dtype_width - units_width
+
+ if hasattr(var._data, "_repr_inline_"):
+ data_repr = var._data._repr_inline_(remaining_width)
+ else:
+ data_repr = inline_variable_array_repr(var, remaining_width)
dtype = var.dtype
- return f"{first_col}{dims_str}{dtype} {data_repr}"
+ return f"{first_col}{dims_str}{dtype}{units_str} {data_repr}"
```
Wait, I need to understand the actual current structure of the xarray formatting.py file to create an accurate patch. Let me approach this based on my knowledge of the xarray codebase at the time of this feature request (around 2020-2021 based on issue references).
--- a/xarray/core/formatting.py
+++ b/xarray/core/formatting.py
@@ -261,6 +261,17 @@ def inline_dask_repr(array):
return f"dask.array<chunksize={chunksize}>"
+def _get_inline_units(var):
+ """Get units string for inline display.
+
+ Checks for duck array units attribute first (e.g., pint),
+ then falls back to attrs['units'].
+ """
+ if hasattr(var.data, "units"):
+ return str(var.data.units)
+ return var.attrs.get("units", "")
+
+
def inline_variable_array_repr(var, max_width):
"""Build a one-line summary of a variable's data."""
if var._in_memory:
@@ -276,7 +287,7 @@ def inline_variable_array_repr(var, max_width):
def summarize_variable(
- name: Hashable, var, col_width: int, marker: str = " ", max_width: int = None
+ name: Hashable, var, col_width: int, marker: str = " ", max_width: int = None, show_units: bool = True
):
"""Summarize a variable in one line, e.g., for the Dataset repr."""
if max_width is None:
@@ -286,9 +297,19 @@ def summarize_variable(
dims_str = "({}) ".format(", ".join(map(str, var.dims)))
dtype = var.dtype
- front_str = f"{first_col}{dims_str}{dtype} "
+ # Get units string
+ units_str = ""
+ if show_units:
+ units = _get_inline_units(var)
+ if units:
+ units_str = f" [{units}]"
+
+ front_str = f"{first_col}{dims_str}{dtype}{units_str} "
- data_repr = inline_variable_array_repr(var, max_width - len(front_str))
+ if hasattr(var._data, "_repr_inline_"):
+ data_repr = var._data._repr_inline_(max_width - len(front_str))
+ else:
+ data_repr = inline_variable_array_repr(var, max_width - len(front_str))
return f"{front_str}{data_repr}"