From 5540f05fb47db58223eacc1870a633975770b2b9 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 2 Jun 2026 18:35:32 +0000
Subject: [PATCH 01/11] Refactor ingestion pipeline and dataset templates

* Introduce a new `dataset_factory` module for schema-driven dataset creation.
* Implement `run_segy_ingestion` orchestrator to streamline SEG-Y ingestion.
* Enhance dataset templates with dimension specification methods to ensure consistency in data types.
* Update `__init__.py` files to include new ingestion components and improve module organization.
* Remove deprecated `schema_resolver.py` to clean up the codebase.
* Add unit tests to validate new functionalities and ensure robustness.
---
 refactor_plan_bak.md                          | 220 +++++++++
 src/mdio/__init__.py                          |   4 +
 src/mdio/builder/dataset_builder.py           |   6 +-
 src/mdio/builder/templates/base.py            |  56 ++-
 src/mdio/builder/templates/seismic_2d_cdp.py  |   6 +-
 .../templates/seismic_2d_streamer_shot.py     |   2 +-
 src/mdio/builder/templates/seismic_3d_cdp.py  |   8 +-
 src/mdio/builder/templates/seismic_3d_coca.py |  20 +-
 src/mdio/builder/templates/seismic_3d_obn.py  |  21 +-
 .../templates/seismic_3d_offset_tiles.py      |  20 +-
 .../templates/seismic_3d_receiver_gathers.py  |  14 +-
 .../seismic_3d_shot_receiver_line.py          |  20 +-
 .../templates/seismic_3d_streamer_field.py    |  20 +-
 .../templates/seismic_3d_streamer_shot.py     |   2 +-
 src/mdio/converters/segy.py                   | 439 +-----------------
 src/mdio/ingestion/__init__.py                |  20 +-
 src/mdio/ingestion/dataset_factory.py         | 262 +++++++++++
 src/mdio/ingestion/metadata.py                |   2 +-
 src/mdio/ingestion/schema/__init__.py         |  20 +
 src/mdio/ingestion/schema/models.py           | 227 +++++++++
 src/mdio/ingestion/schema/resolver.py         |  88 ++++
 src/mdio/ingestion/schema_resolver.py         | 211 ---------
 src/mdio/ingestion/segy/coordinates.py        |  74 ++-
 src/mdio/ingestion/segy/file_headers.py       |   2 +-
 .../ingestion/{ => segy}/index_strategies.py  |  33 +-
 src/mdio/ingestion/segy/pipeline.py           | 194 ++++++++
 src/mdio/ingestion/segy/raw_headers.py        |  64 +++
 src/mdio/ingestion/segy/reader.py             |  83 ++++
 src/mdio/ingestion/segy/serializer.py         | 101 ++++
 src/mdio/ingestion/segy/validation.py         |   2 +-
 src/mdio/segy/geometry.py                     | 114 +----
 src/mdio/segy/utilities.py                    | 171 -------
 tests/unit/ingestion/test_dataset_factory.py  | 128 +++++
 tests/unit/ingestion/test_metadata.py         |  12 +-
 .../ingestion/test_no_template_mutation.py    |  36 ++
 tests/unit/ingestion/test_schema_effects.py   | 112 +++++
 .../test_schema_resolver.py}                  |  49 +-
 tests/unit/ingestion/test_segy_coordinates.py |  82 +---
 .../unit/ingestion/test_segy_file_headers.py  |  14 +-
 .../test_segy_grid_overrides.py               |  72 ++-
 .../test_segy_index_strategies.py}            |  51 +-
 tests/unit/ingestion/test_segy_pipeline.py    |  90 ++++
 tests/unit/ingestion/test_segy_reader.py      | 136 ++++++
 .../test_segy_spec_validation.py              |  10 +-
 tests/unit/ingestion/test_segy_validation.py  |  14 +-
 tests/unit/test_segy_file_header_modes.py     |  20 +-
 46 files changed, 2213 insertions(+), 1139 deletions(-)
 create mode 100644 refactor_plan_bak.md
 create mode 100644 src/mdio/ingestion/dataset_factory.py
 create mode 100644 src/mdio/ingestion/schema/__init__.py
 create mode 100644 src/mdio/ingestion/schema/models.py
 create mode 100644 src/mdio/ingestion/schema/resolver.py
 delete mode 100644 src/mdio/ingestion/schema_resolver.py
 rename src/mdio/ingestion/{ => segy}/index_strategies.py (91%)
 create mode 100644 src/mdio/ingestion/segy/pipeline.py
 create mode 100644 src/mdio/ingestion/segy/raw_headers.py
 create mode 100644 src/mdio/ingestion/segy/reader.py
 create mode 100644 src/mdio/ingestion/segy/serializer.py
 create mode 100644 tests/unit/ingestion/test_dataset_factory.py
 create mode 100644 tests/unit/ingestion/test_no_template_mutation.py
 create mode 100644 tests/unit/ingestion/test_schema_effects.py
 rename tests/unit/{test_ingestion_schema_resolver.py => ingestion/test_schema_resolver.py} (73%)
 rename tests/unit/{ => ingestion}/test_segy_grid_overrides.py (74%)
 rename tests/unit/{test_ingestion_index_strategies.py => ingestion/test_segy_index_strategies.py} (93%)
 create mode 100644 tests/unit/ingestion/test_segy_pipeline.py
 create mode 100644 tests/unit/ingestion/test_segy_reader.py
 rename tests/unit/{ => ingestion}/test_segy_spec_validation.py (91%)

diff --git a/refactor_plan_bak.md b/refactor_plan_bak.md
new file mode 100644
index 00000000..ce33bde9
--- /dev/null
+++ b/refactor_plan_bak.md
@@ -0,0 +1,220 @@
+---
+name: split-grid-overrides-v3
+overview: Split the current `grid_overrides_v3` branch into 8 dependency-ordered, atomically reviewable PRs against `main`, each with its own changelog story. Each PR is green on its own; PR 6 is the one that flips runtime over to the new orchestrator and is the natural minor-version-bump cut.
+todos:
+  - id: pr1
+    content: "PR 1: extract pure helpers (header_analysis, coordinate_utils, validation, metadata) into mdio/ingestion/ with no behavior change"
+    status: pending
+  - id: pr2
+    content: "PR 2: add Pydantic GridOverrides class + dict deprecation shim, additive only"
+    status: pending
+  - id: pr3
+    content: "PR 3: replace GridOverrider internals with IndexStrategy pattern, fix the broken test_segy_grid_overrides.py file"
+    status: pending
+  - id: pr4
+    content: "PR 4: introduce SchemaResolver + ResolvedSchema and template declare_coordinate_specs/apply_resolved_dimensions hooks (dead code, wired in PR 6)"
+    status: pending
+  - id: pr5
+    content: "PR 5: add HeaderAnalyzer and DatasetFactory services with unit tests (dead code, wired in PR 6)"
+    status: pending
+  - id: pr6
+    content: "PR 6: introduce run_segy_ingestion orchestrator, slim segy_to_mdio to a shim, expose new public API, delete legacy GridOverrider, decide CLI fate"
+    status: pending
+  - id: pr7
+    content: "PR 7: extract MDIO__IMPORT__RAW_HEADERS gating into _raw_headers_experimental.py for one-file removal"
+    status: pending
+  - id: pr8
+    content: "PR 8: dependency bump (uv.lock) and template internal polish"
+    status: pending
+isProject: false
+---
+
+# Split `grid_overrides_v3` Into 8 Atomic PRs
+
+## Strategy
+
+The current branch interleaves five themes: typed config, strategy refactor, schema resolution, services, pipeline rewire, and an experimental flag move. Treat the existing branch as the **source of truth for the final state** and replay it as a stack of branches off `main`. Each PR below cherry-picks a tight subset of files; nothing earlier than PR 6 changes the runtime path of `segy_to_mdio`, so each one merges without a behavior change visible to users.
+
+Suggested workflow per PR:
+
+1. `git checkout -b <pr-name> origin/main`
+2. `git checkout grid_overrides_v3 -- <files for this PR>`
+3. Trim back anything that pulls in scope from later PRs (notes per PR below).
+4. Run `nox -s pre-commit` and `nox -s tests-3.13 -- <relevant tests>` per the workspace rule.
+5. Open the PR with the suggested title/summary.
+
+```mermaid
+flowchart LR
+    PR1[PR1 extract pure modules] --> PR2[PR2 Pydantic GridOverrides]
+    PR1 --> PR3[PR3 strategy pattern]
+    PR2 --> PR3
+    PR3 --> PR4[PR4 schema resolver and template hooks]
+    PR4 --> PR5[PR5 HeaderAnalyzer and DatasetFactory]
+    PR5 --> PR6[PR6 pipeline orchestrator and public API]
+    PR6 --> PR7[PR7 raw-headers gating]
+    PR6 --> PR8[PR8 dep bump and template polish]
+```
+
+---
+
+## PR 1 - Extract pure modules into `mdio/ingestion/`
+
+**Goal:** mechanical code move, zero behavior change. Gives the next PRs a home without dragging architecture along.
+
+**Files (copy from branch verbatim):**
+- [src/mdio/ingestion/__init__.py](src/mdio/ingestion/__init__.py) - trimmed to only re-export what this PR introduces
+- [src/mdio/ingestion/header_analysis.py](src/mdio/ingestion/header_analysis.py) (the pure analysis primitives + `StreamerShotGeometryType`, `ShotGunGeometryType`, `analyze_streamer_headers`, `analyze_lines_for_guns`, `analyze_non_indexed_headers`)
+- [src/mdio/ingestion/coordinate_utils.py](src/mdio/ingestion/coordinate_utils.py)
+- [src/mdio/ingestion/validation.py](src/mdio/ingestion/validation.py) (`grid_density_qc`, `validate_spec_in_template`)
+- [src/mdio/ingestion/metadata.py](src/mdio/ingestion/metadata.py) MINUS the `attach_raw_binary_header` import (defer to PR 7)
+
+**Adjustments before commit:**
+- In [src/mdio/ingestion/metadata.py](src/mdio/ingestion/metadata.py), drop `from mdio.ingestion._raw_headers_experimental import attach_raw_binary_header` and its call inside `add_segy_file_headers`. Inline the existing on-`main` raw-binary handling instead, or stub it as a TODO (PR 7 reintroduces the gating module).
+- Trim `__init__.py` exports to only the symbols actually defined in this PR.
+- Update import sites in `mdio/converters/segy.py` and any other on-`main` callers to point at the new module locations.
+
+**Tests:** existing tests must pass unchanged (`nox -s tests-3.13`).
+
+**PR title:** `refactor(ingestion): extract analysis, coordinate, validation, and metadata helpers`
+
+---
+
+## PR 2 - Add Pydantic `GridOverrides` (additive)
+
+**Goal:** the typed model lands as a public class, dict path stays alive internally.
+
+**Files:**
+- [src/mdio/segy/geometry.py](src/mdio/segy/geometry.py) - add the `GridOverrides` class **alongside** the existing `GridOverrider` (do not delete `GridOverrider` yet).
+- [src/mdio/converters/segy.py](src/mdio/converters/segy.py) - add the `isinstance(grid_overrides, dict)` shim with `DeprecationWarning` and `GridOverrides.from_legacy_dict(...)` coercion. After coercion, immediately re-dump to dict via `model_dump(by_alias=True, exclude_defaults=True)` so the still-on-`main` `GridOverrider` keeps receiving the dict it expects.
+- [src/mdio/__init__.py](src/mdio/__init__.py) - add `GridOverrides` to `__all__` and imports.
+- [tests/unit/test_grid_overrides_pydantic.py](tests/unit/test_grid_overrides_pydantic.py) - new file, copy as-is.
+
+**Acceptance:** existing `tests/unit/test_segy_grid_overrides.py` (which imports `GridOverrider`) still passes; `test_grid_overrides_pydantic.py` passes.
+
+**PR title:** `feat(api): typed Pydantic GridOverrides with legacy dict deprecation`
+
+**Changelog line:** "`mdio.GridOverrides` is now the supported way to configure grid overrides. Passing a `dict` still works but emits a `DeprecationWarning`."
+
+---
+
+## PR 3 - Replace `GridOverrider` with strategy pattern
+
+**Goal:** the "modernization to grid override code" PR you called out. Behavior preserved; the monolithic `GridOverrider.run(...)` becomes `IndexStrategyRegistry().create_strategy(...)` returning composable `IndexStrategy` instances.
+
+**Files:**
+- [src/mdio/ingestion/index_strategies.py](src/mdio/ingestion/index_strategies.py) - new (`IndexStrategy` ABC + `RegularGridStrategy`, `NonBinnedStrategy`, `DuplicateHandlingStrategy`, `ChannelWrappingStrategy`, `ShotWrappingStrategy`, `ComponentSynthesisStrategy`, `CompositeStrategy`, `IndexStrategyRegistry`).
+- [src/mdio/segy/geometry.py](src/mdio/segy/geometry.py) - rewrite `GridOverrider.run(...)` as a thin shim: build a `GridOverrides` from the input dict, call `IndexStrategyRegistry`, return the `(headers, names, chunks)` tuple shape that callers expect. Keep the public signature unchanged.
+- [tests/unit/test_ingestion_index_strategies.py](tests/unit/test_ingestion_index_strategies.py) - new.
+- [tests/unit/test_segy_grid_overrides.py](tests/unit/test_segy_grid_overrides.py) - **fix**: the file currently imports `GridOverrider` and `TemplateRegistry`; either keep the shim alive (above) so this file passes unchanged, or rewrite each test to drive `IndexStrategyRegistry` directly. The branch leaves it in a broken-import state, which is the kind of footgun this PR should clean up.
+
+**Acceptance:** integration tests `tests/integration/test_import_obn_grid_overrides.py` and `tests/integration/test_import_streamer_grid_overrides.py` pass; new strategy unit tests pass; old `test_segy_grid_overrides.py` either passes or is replaced.
+
+**PR title:** `refactor(ingestion): split GridOverrider into composable IndexStrategy classes`
+
+---
+
+## PR 4 - Schema resolution + template-aware dimension layout
+
+**Goal:** declarative final-schema description; templates stop having ingestion poke private attributes.
+
+**Files:**
+- [src/mdio/ingestion/schema_resolver.py](src/mdio/ingestion/schema_resolver.py) - new (`DimensionSpec`, `CoordinateSpec`, `ResolvedSchema`, `SchemaResolver`).
+- [src/mdio/builder/templates/base.py](src/mdio/builder/templates/base.py) - add `declare_coordinate_specs()` (default impl over `physical_coordinate_names` + `logical_coordinate_names`) and `apply_resolved_dimensions(dim_names, chunk_shape)`. Add the `synthesize_missing_dims` and `_calculated_dims` attributes.
+- Per-template overrides only where the default isn't right:
+  - [src/mdio/builder/templates/seismic_3d_obn.py](src/mdio/builder/templates/seismic_3d_obn.py) - per-shot-dim coord specs, `_calculated_dims = ("shot_index",)`, `synthesize_missing_dims = ("component",)`.
+  - [src/mdio/builder/templates/seismic_3d_streamer_field.py](src/mdio/builder/templates/seismic_3d_streamer_field.py)
+  - [src/mdio/builder/templates/seismic_3d_coca.py](src/mdio/builder/templates/seismic_3d_coca.py)
+  - [src/mdio/builder/templates/seismic_3d_cdp.py](src/mdio/builder/templates/seismic_3d_cdp.py), [src/mdio/builder/templates/seismic_2d_cdp.py](src/mdio/builder/templates/seismic_2d_cdp.py) - inline/crossline-indexed `cdp_x`/`cdp_y` specs.
+- [tests/unit/test_ingestion_schema_resolver.py](tests/unit/test_ingestion_schema_resolver.py) - new.
+
+**Adjustments:** nothing in this PR consumes `ResolvedSchema` at runtime - the resolver is dead code until PR 5/6 wire it. That is intentional.
+
+**Acceptance:** all template tests under `tests/unit/v1/templates/` still pass; new resolver tests pass.
+
+**PR title:** `feat(ingestion): declarative ResolvedSchema and template-driven coordinate specs`
+
+---
+
+## PR 5 - `HeaderAnalyzer` + `DatasetFactory`
+
+**Goal:** add the two small services that PR 6 will compose. They have a real changelog bullet on their own ("ingestion now reads only the headers required by the resolved schema").
+
+**Files:**
+- [src/mdio/ingestion/header_analyzer.py](src/mdio/ingestion/header_analyzer.py)
+- [src/mdio/ingestion/dataset_factory.py](src/mdio/ingestion/dataset_factory.py)
+- [tests/unit/test_ingestion_header_analyzer.py](tests/unit/test_ingestion_header_analyzer.py)
+- [tests/unit/test_ingestion_dataset_factory.py](tests/unit/test_ingestion_dataset_factory.py)
+
+**Adjustments:** still no runtime use; pipeline switch is PR 6.
+
+**Acceptance:** new unit tests pass; everything else unchanged.
+
+**PR title:** `feat(ingestion): HeaderAnalyzer and DatasetFactory services`
+
+---
+
+## PR 6 - Pipeline orchestrator + public API surface (the minor-version-bump PR)
+
+**Goal:** wire PRs 2-5 into a single `run_segy_ingestion`, replace the body of `segy_to_mdio` with a thin shim, surface the new public types.
+
+**Files:**
+- [src/mdio/ingestion/pipeline.py](src/mdio/ingestion/pipeline.py) - new (`run_segy_ingestion`).
+- [src/mdio/converters/segy.py](src/mdio/converters/segy.py) - body becomes the dict-deprecation shim + delegate to `run_segy_ingestion`. The `noqa: PLR0913` and old-style internals go away.
+- [src/mdio/ingestion/__init__.py](src/mdio/ingestion/__init__.py) - expand to the full `__all__` from the branch.
+- [src/mdio/__init__.py](src/mdio/__init__.py) - add `run_segy_ingestion`, `IndexStrategy`, `IndexStrategyRegistry`, `ResolvedSchema`, `CoordinateSpec`, `DimensionSpec`.
+- [src/mdio/segy/geometry.py](src/mdio/segy/geometry.py) - **delete** the legacy `GridOverrider` shim that PR 3 kept alive. All callers now route through `run_segy_ingestion`.
+- [src/mdio/commands/segy.py](src/mdio/commands/segy.py) - **fix the broken CLI**: it currently calls `segy_to_mdio(segy_path=..., index_bytes=..., index_names=..., chunksize=...)`, none of which match the v1 signature in `converters/segy.py`. Either update the CLI to construct a `SegySpec` + `mdio_template` + `GridOverrides` and call `segy_to_mdio` correctly, or wrap it in a `# TODO(v1.3): rewrite CLI for v1 API` and skip it from CI. Pick one explicitly; do not leave it silently broken.
+
+**Acceptance:** full `nox -s tests-3.13` passes; integration tests under `tests/integration/` pass; example notebooks (if any) still run.
+
+**PR title:** `feat(ingestion): run_segy_ingestion orchestrator and v1.2 public API`
+
+**Changelog highlights:**
+- New public functions/classes: `run_segy_ingestion`, `IndexStrategy`, `IndexStrategyRegistry`, `ResolvedSchema`, `CoordinateSpec`, `DimensionSpec`.
+- `segy_to_mdio` retained as a v1.x compatibility entry point.
+- Memory: ingestion now only parses headers required by the resolved schema (`HeaderAnalyzer`).
+
+---
+
+## PR 7 - Gate experimental raw-headers behind `_raw_headers_experimental.py`
+
+**Goal:** isolate the `MDIO__IMPORT__RAW_HEADERS` feature so removing it later is a one-file delete.
+
+**Files:**
+- [src/mdio/ingestion/_raw_headers_experimental.py](src/mdio/ingestion/_raw_headers_experimental.py) - new. The module's own docstring is the PR description; copy it into the PR body.
+- [src/mdio/ingestion/pipeline.py](src/mdio/ingestion/pipeline.py) - call `maybe_add_raw_headers(mdio_template, mdio_ds)` after `DatasetFactory().build(...)`.
+- [src/mdio/ingestion/metadata.py](src/mdio/ingestion/metadata.py) - re-add the `attach_raw_binary_header` import + call inside `add_segy_file_headers` (the inverse of the trim in PR 1).
+- [src/mdio/builder/templates/base.py](src/mdio/builder/templates/base.py) - keep the `_add_raw_headers` method (defined in PR 4) in place; no change here unless the prior PR omitted it.
+
+**Acceptance:** with `MDIO__IMPORT__RAW_HEADERS=1` and Zarr v3, raw_headers variable appears in output; without the env var, no behavior change.
+
+**PR title:** `refactor: gate experimental raw-headers feature behind a single module`
+
+---
+
+## PR 8 - Dep bump + template polish
+
+**Goal:** the leftover small stuff. Optional; can ride along with PR 6 if reviewers prefer.
+
+**Files:**
+- [uv.lock](uv.lock)
+- Per-template polish in [src/mdio/builder/templates/](src/mdio/builder/templates/) (`_dim_names`/`_calculated_dims` cleanup, dead-method removal).
+
+**Acceptance:** full test suite green; `nox -s pre-commit` clean.
+
+**PR title:** `chore(deps): bump dependencies and polish template internals`
+
+---
+
+## Risks & coupling notes
+
+- **PR 4-6 are the tightest cluster.** PR 4's `ResolvedSchema` is dead code at merge time, PR 5's services are dead code at merge time, PR 6 flips runtime. This staging is intentional - it keeps each diff small and reviewable, at the cost of two PRs that "do nothing yet". If reviewers push back on dead code, fold PRs 4 and 5 together but **never** fold them into PR 6.
+- **PR 3 must keep `GridOverrider` callable** until PR 6 deletes it, otherwise existing callers in `mdio/converters/segy.py` (still on the v1.1 path) break mid-stack.
+- **`tests/unit/test_segy_grid_overrides.py`** is currently broken on the branch (imports a `GridOverrider` symbol that no longer exists in `mdio.segy.geometry`). PR 3 is the place to fix it. Do not let it land broken in a separate PR.
+- **CLI in `mdio/commands/segy.py`** has the same problem (calls v0-style `segy_to_mdio(segy_path=..., index_bytes=..., ...)`). PR 6 must either fix or explicitly defer with a tracked issue - this would be very embarrassing in a release notes diff.
+- **Autodocs.** The split gives ~8 changelog bullets instead of one "big refactor" line. PR 2 (typed config), PR 3 (strategy pattern), PR 5 (memory win), and PR 6 (new public API) are all release-note-worthy on their own.
+
+## Suggested merge order for the next two minor releases
+
+- v1.2: PRs 1, 2, 7 (no runtime change, deprecation only, easy to revert).
+- v1.3: PRs 3, 4, 5, 6, 8 (the actual architectural turn, with the orchestrator landing in PR 6 as the headline).
diff --git a/src/mdio/__init__.py b/src/mdio/__init__.py
index 7e1851c0..bfae0f30 100644
--- a/src/mdio/__init__.py
+++ b/src/mdio/__init__.py
@@ -8,6 +8,8 @@
 from mdio.api.io import to_mdio
 from mdio.converters import mdio_to_segy
 from mdio.converters import segy_to_mdio
+from mdio.ingestion import ResolvedSchema
+from mdio.ingestion import run_segy_ingestion
 from mdio.optimize.access_pattern import OptimizedAccessPatternConfig
 from mdio.optimize.access_pattern import optimize_access_patterns
 from mdio.segy.geometry import GridOverrides
@@ -27,4 +29,6 @@
     "segy_to_mdio",
     "OptimizedAccessPatternConfig",
     "optimize_access_patterns",
+    "run_segy_ingestion",
+    "ResolvedSchema",
 ]
diff --git a/src/mdio/builder/dataset_builder.py b/src/mdio/builder/dataset_builder.py
index b2560e1d..81ab5b65 100644
--- a/src/mdio/builder/dataset_builder.py
+++ b/src/mdio/builder/dataset_builder.py
@@ -4,9 +4,13 @@
 from datetime import datetime
 from enum import Enum
 from enum import auto
+from importlib import metadata
 from typing import Any
 
-from mdio import __version__
+try:
+    __version__ = metadata.version("multidimio")
+except metadata.PackageNotFoundError:
+    __version__ = "unknown"
 from mdio.builder.formatting_html import dataset_builder_repr_html
 from mdio.builder.schemas.compressors import ZFP
 from mdio.builder.schemas.compressors import Blosc
diff --git a/src/mdio/builder/templates/base.py b/src/mdio/builder/templates/base.py
index 644ba91c..928c5926 100644
--- a/src/mdio/builder/templates/base.py
+++ b/src/mdio/builder/templates/base.py
@@ -114,6 +114,46 @@ def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
         )
         return tuple(specs)
 
+    def declare_dimension_specs(self) -> dict[str, ScalarType]:
+        """Declare the data types for each dimension in this template.
+
+        This is the single source of truth for dimension-coordinate data types: both the
+        ingestion ``SchemaResolver`` and :meth:`_add_dimension_coordinate` read from it, so
+        the resolved schema and the built dataset cannot disagree.
+
+        Returns:
+            A dictionary mapping dimension name to ScalarType.
+        """
+        return dict.fromkeys(self.dimension_names, ScalarType.INT32)
+
+    def _dim_dtype(self, name: str) -> ScalarType:
+        """Return the declared dtype for a dimension coordinate.
+
+        Sourcing dimension-coordinate dtypes here (and in the ingestion ``SchemaResolver``)
+        from :meth:`declare_dimension_specs` keeps the built dataset and the resolved schema
+        from disagreeing without a separate post-build assertion.
+
+        Args:
+            name: The dimension name.
+
+        Returns:
+            The declared :class:`ScalarType`, defaulting to ``INT32`` when undeclared.
+        """
+        return self.declare_dimension_specs().get(name, ScalarType.INT32)
+
+    def _add_dimension_coordinate(self, name: str) -> None:
+        """Add a single dimension coordinate, sourcing its dtype from :meth:`declare_dimension_specs`.
+
+        Args:
+            name: The dimension name; also the coordinate name and its sole dimension.
+        """
+        self._builder.add_coordinate(
+            name,
+            dimensions=(name,),
+            data_type=self._dim_dtype(name),
+            metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(name)),
+        )
+
     def build_dataset(
         self,
         name: str,
@@ -354,6 +394,15 @@ def _load_dataset_attributes(self) -> dict[str, Any]:
             The dataset attributes as a dictionary
         """
 
+    @property
+    def units(self) -> dict[str, AllUnitModel]:
+        """Return a copy of the template's configured units.
+
+        Read-only view for collaborators (e.g. ingestion unit resolution) so they do not
+        reach into the private ``_units`` mapping.
+        """
+        return dict(self._units)
+
     def get_unit_by_key(self, key: str) -> AllUnitModel | None:
         """Get units by variable/dimension/coordinate name. Returns None if not found."""
         return self._units.get(key, None)
@@ -375,12 +424,7 @@ def _add_coordinates(self) -> None:
         """
         # Add dimension coordinates
         for name in self._dim_names:
-            self._builder.add_coordinate(
-                name,
-                dimensions=(name,),
-                data_type=ScalarType.INT32,
-                metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(name)),
-            )
+            self._add_dimension_coordinate(name)
 
         # Add non-dimension coordinates
         # Note: coordinate_names may be modified at runtime by grid overrides,
diff --git a/src/mdio/builder/templates/seismic_2d_cdp.py b/src/mdio/builder/templates/seismic_2d_cdp.py
index fd2ca808..91185d09 100644
--- a/src/mdio/builder/templates/seismic_2d_cdp.py
+++ b/src/mdio/builder/templates/seismic_2d_cdp.py
@@ -48,18 +48,18 @@ def _add_coordinates(self) -> None:
         self._builder.add_coordinate(
             "cdp",
             dimensions=("cdp",),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype("cdp"),
         )
         self._builder.add_coordinate(
             self._gather_domain,
             dimensions=(self._gather_domain,),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype(self._gather_domain),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(self._gather_domain)),
         )
         self._builder.add_coordinate(
             self.trace_domain,
             dimensions=(self.trace_domain,),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype(self.trace_domain),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(self.trace_domain)),
         )
 
diff --git a/src/mdio/builder/templates/seismic_2d_streamer_shot.py b/src/mdio/builder/templates/seismic_2d_streamer_shot.py
index 4999fba8..7be80d03 100644
--- a/src/mdio/builder/templates/seismic_2d_streamer_shot.py
+++ b/src/mdio/builder/templates/seismic_2d_streamer_shot.py
@@ -44,7 +44,7 @@ def _add_coordinates(self) -> None:
             self._builder.add_coordinate(
                 name,
                 dimensions=(name,),
-                data_type=ScalarType.INT32,
+                data_type=self._dim_dtype(name),
                 metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(name)),
             )
 
diff --git a/src/mdio/builder/templates/seismic_3d_cdp.py b/src/mdio/builder/templates/seismic_3d_cdp.py
index 6490a7de..6cbb6bc6 100644
--- a/src/mdio/builder/templates/seismic_3d_cdp.py
+++ b/src/mdio/builder/templates/seismic_3d_cdp.py
@@ -48,23 +48,23 @@ def _add_coordinates(self) -> None:
         self._builder.add_coordinate(
             "inline",
             dimensions=("inline",),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype("inline"),
         )
         self._builder.add_coordinate(
             "crossline",
             dimensions=("crossline",),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype("crossline"),
         )
         self._builder.add_coordinate(
             self._gather_domain,
             dimensions=(self._gather_domain,),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype(self._gather_domain),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(self._gather_domain)),
         )
         self._builder.add_coordinate(
             self.trace_domain,
             dimensions=(self.trace_domain,),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype(self.trace_domain),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(self.trace_domain)),
         )
 
diff --git a/src/mdio/builder/templates/seismic_3d_coca.py b/src/mdio/builder/templates/seismic_3d_coca.py
index fc296a33..d83d4fd6 100644
--- a/src/mdio/builder/templates/seismic_3d_coca.py
+++ b/src/mdio/builder/templates/seismic_3d_coca.py
@@ -34,34 +34,44 @@ def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
             CoordinateSpec(name="cdp_y", dimensions=("inline", "crossline"), dtype=ScalarType.FLOAT64),
         )
 
+    def declare_dimension_specs(self) -> dict[str, ScalarType]:
+        """Declare the data types for each dimension in this template."""
+        return {
+            "inline": ScalarType.INT32,
+            "crossline": ScalarType.INT32,
+            "offset": ScalarType.INT32,
+            "azimuth": ScalarType.FLOAT32,
+            self._data_domain: ScalarType.INT32,
+        }
+
     def _add_coordinates(self) -> None:
         # Add dimension coordinates
         self._builder.add_coordinate(
             "inline",
             dimensions=("inline",),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype("inline"),
         )
         self._builder.add_coordinate(
             "crossline",
             dimensions=("crossline",),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype("crossline"),
         )
         self._builder.add_coordinate(
             "offset",
             dimensions=("offset",),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype("offset"),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("offset")),  # same unit as X/Y
         )
         self._builder.add_coordinate(
             "azimuth",
             dimensions=("azimuth",),
-            data_type=ScalarType.FLOAT32,
+            data_type=self._dim_dtype("azimuth"),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("azimuth")),
         )
         self._builder.add_coordinate(
             self.trace_domain,
             dimensions=(self.trace_domain,),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype(self.trace_domain),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(self.trace_domain)),
         )
 
diff --git a/src/mdio/builder/templates/seismic_3d_obn.py b/src/mdio/builder/templates/seismic_3d_obn.py
index e5b81659..503f6022 100644
--- a/src/mdio/builder/templates/seismic_3d_obn.py
+++ b/src/mdio/builder/templates/seismic_3d_obn.py
@@ -33,6 +33,7 @@ def __init__(self, data_domain: SeismicDataDomain = "time"):
 
         self._spatial_dim_names = ("component", "receiver", "shot_line", "gun", "shot_index")
         self._calculated_dims = ("shot_index",)
+        self.synthesize_missing_dims = ("component",)
         self._dim_names = (*self._spatial_dim_names, self._data_domain)
         self._physical_coord_names = (
             "group_coord_x",
@@ -63,33 +64,43 @@ def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
             CoordinateSpec(name="source_coord_y", dimensions=shot_dims, dtype=ScalarType.FLOAT64),
         )
 
+    def declare_dimension_specs(self) -> dict[str, ScalarType]:
+        """Declare the data types for each dimension in this template."""
+        return {
+            "component": ScalarType.UINT8,
+            "receiver": ScalarType.UINT32,
+            "shot_line": ScalarType.UINT32,
+            "gun": ScalarType.UINT8,
+            self._data_domain: ScalarType.INT32,
+        }
+
     def _add_coordinates(self) -> None:
         # Add dimension coordinates
         # EXCLUDE: `shot_index` since it's 0-N (calculated dimension)
         self._builder.add_coordinate(
             "component",
             dimensions=("component",),
-            data_type=ScalarType.UINT8,
+            data_type=self._dim_dtype("component"),
         )
         self._builder.add_coordinate(
             "receiver",
             dimensions=("receiver",),
-            data_type=ScalarType.UINT32,
+            data_type=self._dim_dtype("receiver"),
         )
         self._builder.add_coordinate(
             "shot_line",
             dimensions=("shot_line",),
-            data_type=ScalarType.UINT32,
+            data_type=self._dim_dtype("shot_line"),
         )
         self._builder.add_coordinate(
             "gun",
             dimensions=("gun",),
-            data_type=ScalarType.UINT8,
+            data_type=self._dim_dtype("gun"),
         )
         self._builder.add_coordinate(
             self._data_domain,
             dimensions=(self._data_domain,),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype(self._data_domain),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(self._data_domain)),
         )
 
diff --git a/src/mdio/builder/templates/seismic_3d_offset_tiles.py b/src/mdio/builder/templates/seismic_3d_offset_tiles.py
index 3ec802c7..5eb5df54 100644
--- a/src/mdio/builder/templates/seismic_3d_offset_tiles.py
+++ b/src/mdio/builder/templates/seismic_3d_offset_tiles.py
@@ -41,36 +41,46 @@ def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
             CoordinateSpec(name="cdp_y", dimensions=("inline", "crossline"), dtype=ScalarType.FLOAT64),
         )
 
+    def declare_dimension_specs(self) -> dict[str, ScalarType]:
+        """Declare the data types for each dimension in this template."""
+        return {
+            "inline": ScalarType.INT32,
+            "crossline": ScalarType.INT32,
+            "inline_offset_tile": ScalarType.INT16,
+            "crossline_offset_tile": ScalarType.INT16,
+            self._data_domain: ScalarType.INT32,
+        }
+
     def _add_coordinates(self) -> None:
         # Add dimension coordinates
         self._builder.add_coordinate(
             "inline",
             dimensions=("inline",),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype("inline"),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("inline")),
         )
         self._builder.add_coordinate(
             "crossline",
             dimensions=("crossline",),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype("crossline"),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("crossline")),
         )
         self._builder.add_coordinate(
             "inline_offset_tile",
             dimensions=("inline_offset_tile",),
-            data_type=ScalarType.INT16,
+            data_type=self._dim_dtype("inline_offset_tile"),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("inline_offset_tile")),
         )
         self._builder.add_coordinate(
             "crossline_offset_tile",
             dimensions=("crossline_offset_tile",),
-            data_type=ScalarType.INT16,
+            data_type=self._dim_dtype("crossline_offset_tile"),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("crossline_offset_tile")),
         )
         self._builder.add_coordinate(
             self.trace_domain,
             dimensions=(self.trace_domain,),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype(self.trace_domain),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(self.trace_domain)),
         )
 
diff --git a/src/mdio/builder/templates/seismic_3d_receiver_gathers.py b/src/mdio/builder/templates/seismic_3d_receiver_gathers.py
index 232f7a22..911ecf87 100644
--- a/src/mdio/builder/templates/seismic_3d_receiver_gathers.py
+++ b/src/mdio/builder/templates/seismic_3d_receiver_gathers.py
@@ -45,25 +45,33 @@ def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
             CoordinateSpec(name="source_coord_y", dimensions=shot_dims, dtype=ScalarType.FLOAT64),
         )
 
+    def declare_dimension_specs(self) -> dict[str, ScalarType]:
+        """Declare the data types for each dimension in this template."""
+        return {
+            "receiver": ScalarType.UINT32,
+            "shot_line": ScalarType.UINT32,
+            self._data_domain: ScalarType.INT32,
+        }
+
     def _add_coordinates(self) -> None:
         # Add dimension coordinates
         # Note: shot_index is calculated (0-N), so we don't add a coordinate for it
         self._builder.add_coordinate(
             "receiver",
             dimensions=("receiver",),
-            data_type=ScalarType.UINT32,
+            data_type=self._dim_dtype("receiver"),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("receiver")),
         )
         self._builder.add_coordinate(
             "shot_line",
             dimensions=("shot_line",),
-            data_type=ScalarType.UINT32,
+            data_type=self._dim_dtype("shot_line"),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("shot_line")),
         )
         self._builder.add_coordinate(
             self.trace_domain,
             dimensions=(self.trace_domain,),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype(self.trace_domain),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(self.trace_domain)),
         )
 
diff --git a/src/mdio/builder/templates/seismic_3d_shot_receiver_line.py b/src/mdio/builder/templates/seismic_3d_shot_receiver_line.py
index e5e2dbad..311ca046 100644
--- a/src/mdio/builder/templates/seismic_3d_shot_receiver_line.py
+++ b/src/mdio/builder/templates/seismic_3d_shot_receiver_line.py
@@ -45,32 +45,42 @@ def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
             CoordinateSpec(name="orig_field_record_num", dimensions=source_dims, dtype=ScalarType.UINT32),
         )
 
+    def declare_dimension_specs(self) -> dict[str, ScalarType]:
+        """Declare the data types for each dimension in this template."""
+        return {
+            "shot_line": ScalarType.UINT32,
+            "shot_point": ScalarType.UINT32,
+            "receiver_line": ScalarType.UINT32,
+            "receiver": ScalarType.UINT32,
+            self._data_domain: ScalarType.INT32,
+        }
+
     def _add_coordinates(self) -> None:
         # Add dimension coordinates
         self._builder.add_coordinate(
             "shot_line",
             dimensions=("shot_line",),
-            data_type=ScalarType.UINT32,
+            data_type=self._dim_dtype("shot_line"),
         )
         self._builder.add_coordinate(
             "shot_point",
             dimensions=("shot_point",),
-            data_type=ScalarType.UINT32,
+            data_type=self._dim_dtype("shot_point"),
         )
         self._builder.add_coordinate(
             "receiver_line",
             dimensions=("receiver_line",),
-            data_type=ScalarType.UINT32,
+            data_type=self._dim_dtype("receiver_line"),
         )
         self._builder.add_coordinate(
             "receiver",
             dimensions=("receiver",),
-            data_type=ScalarType.UINT32,
+            data_type=self._dim_dtype("receiver"),
         )
         self._builder.add_coordinate(
             self._data_domain,
             dimensions=(self._data_domain,),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype(self._data_domain),
             metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(self._data_domain)),
         )
 
diff --git a/src/mdio/builder/templates/seismic_3d_streamer_field.py b/src/mdio/builder/templates/seismic_3d_streamer_field.py
index c2613e6a..d8e23368 100644
--- a/src/mdio/builder/templates/seismic_3d_streamer_field.py
+++ b/src/mdio/builder/templates/seismic_3d_streamer_field.py
@@ -52,33 +52,43 @@ def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
             CoordinateSpec(name="group_coord_y", dimensions=receiver_dims, dtype=ScalarType.FLOAT64),
         )
 
+    def declare_dimension_specs(self) -> dict[str, ScalarType]:
+        """Declare the data types for each dimension in this template."""
+        return {
+            "sail_line": ScalarType.UINT32,
+            "gun": ScalarType.UINT8,
+            "cable": ScalarType.UINT8,
+            "channel": ScalarType.UINT16,
+            self._data_domain: ScalarType.INT32,
+        }
+
     def _add_coordinates(self) -> None:
         # Add dimension coordinates
         # EXCLUDE: `shot_index` since its 0-N
         self._builder.add_coordinate(
             "sail_line",
             dimensions=("sail_line",),
-            data_type=ScalarType.UINT32,
+            data_type=self._dim_dtype("sail_line"),
         )
         self._builder.add_coordinate(
             "gun",
             dimensions=("gun",),
-            data_type=ScalarType.UINT8,
+            data_type=self._dim_dtype("gun"),
         )
         self._builder.add_coordinate(
             "cable",
             dimensions=("cable",),
-            data_type=ScalarType.UINT8,
+            data_type=self._dim_dtype("cable"),
         )
         self._builder.add_coordinate(
             "channel",
             dimensions=("channel",),
-            data_type=ScalarType.UINT16,
+            data_type=self._dim_dtype("channel"),
         )
         self._builder.add_coordinate(
             self._data_domain,
             dimensions=(self._data_domain,),
-            data_type=ScalarType.INT32,
+            data_type=self._dim_dtype(self._data_domain),
         )
 
         # Add non-dimension coordinates
diff --git a/src/mdio/builder/templates/seismic_3d_streamer_shot.py b/src/mdio/builder/templates/seismic_3d_streamer_shot.py
index 4709dba3..744c5fe6 100644
--- a/src/mdio/builder/templates/seismic_3d_streamer_shot.py
+++ b/src/mdio/builder/templates/seismic_3d_streamer_shot.py
@@ -46,7 +46,7 @@ def _add_coordinates(self) -> None:
             self._builder.add_coordinate(
                 name,
                 dimensions=(name,),
-                data_type=ScalarType.INT32,
+                data_type=self._dim_dtype(name),
                 metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(name)),
             )
 
diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py
index 1dd9d20a..8692b40e 100644
--- a/src/mdio/converters/segy.py
+++ b/src/mdio/converters/segy.py
@@ -5,347 +5,20 @@
 import logging
 from typing import TYPE_CHECKING
 
-import numpy as np
-import zarr
-from segy.config import SegyFileSettings
-from segy.config import SegyHeaderOverrides
-
-from mdio.api.io import _normalize_path
-from mdio.api.io import to_mdio
-from mdio.builder.schemas.chunk_grid import RegularChunkGrid
-from mdio.builder.schemas.chunk_grid import RegularChunkShape
-from mdio.builder.schemas.compressors import Blosc
-from mdio.builder.schemas.compressors import BloscCname
-from mdio.builder.schemas.dtype import ScalarType
-from mdio.builder.schemas.v1.variable import VariableMetadata
-from mdio.builder.xarray_builder import to_xarray_dataset
-from mdio.constants import ZarrFormat
-from mdio.converters.exceptions import GridTraceCountError
-from mdio.converters.type_converter import to_structured_type
-from mdio.core.config import MDIOSettings
-from mdio.core.grid import Grid
-from mdio.core.utils_write import MAX_COORDINATES_BYTES
-from mdio.core.utils_write import MAX_SIZE_LIVE_MASK
-from mdio.core.utils_write import get_constrained_chunksize
-from mdio.ingestion.grid_qc import grid_density_qc
-from mdio.ingestion.metadata import _add_grid_override_to_metadata
-from mdio.ingestion.segy.coordinates import _get_coordinates
-from mdio.ingestion.segy.coordinates import _get_spatial_coordinate_unit
-from mdio.ingestion.segy.coordinates import _populate_coordinates
-from mdio.ingestion.segy.coordinates import _update_template_units
-from mdio.ingestion.segy.file_headers import _add_segy_file_headers
-from mdio.ingestion.segy.validation import _validate_spec_in_template
-from mdio.segy import blocked_io
-from mdio.segy.file import get_segy_file_info
+from mdio.ingestion.segy.pipeline import run_segy_ingestion
 from mdio.segy.geometry import GridOverrides
-from mdio.segy.utilities import get_grid_plan
+
+logger = logging.getLogger(__name__)
 
 if TYPE_CHECKING:
     from pathlib import Path
     from typing import Any
 
-    from segy.arrays import HeaderArray as SegyHeaderArray
+    from segy.config import SegyHeaderOverrides
     from segy.schema import SegySpec
     from upath import UPath
-    from xarray import Dataset as xr_Dataset
 
-    from mdio.builder.schemas import Dataset
     from mdio.builder.templates.base import AbstractDatasetTemplate
-    from mdio.core.dimension import Dimension
-    from mdio.segy.file import SegyFileArguments
-    from mdio.segy.file import SegyFileInfo
-
-logger = logging.getLogger(__name__)
-
-
-def _patch_add_coordinates_for_non_binned(
-    template: AbstractDatasetTemplate,
-    non_binned_dims: set[str],
-) -> None:
-    """Patch template's _add_coordinates to skip adding non-binned dims as dimension coordinates.
-
-    When NonBinned override is used, dimensions like 'offset' or 'azimuth' become coordinates
-    instead of dimensions. However, template subclasses may still try to add them as 1D
-    dimension coordinates (e.g., with dimensions=("offset",)). Since 'offset' is no longer
-    a dimension, the builder substitutes 'trace', resulting in wrong coordinate dimensions.
-
-    This function patches the template's _add_coordinates method to intercept calls to
-    builder.add_coordinate and skip adding coordinates that are non-binned dims with
-    single-element dimension tuples. These coordinates will be added later by build_dataset
-    with the correct spatial_dimension_names (e.g., (inline, crossline, trace)).
-
-    Args:
-        template: The template to patch
-        non_binned_dims: Set of dimension names that became coordinates due to NonBinned override
-    """
-    # Check if already patched to avoid duplicate patching
-    if hasattr(template, "_mdio_non_binned_patched"):
-        return
-
-    # Store the original _add_coordinates method
-    original_add_coordinates = template._add_coordinates
-
-    def patched_add_coordinates() -> None:
-        """Wrapper that intercepts builder.add_coordinate calls for non-binned dims."""
-        # Store the original add_coordinate method from the builder
-        original_builder_add_coordinate = template._builder.add_coordinate
-
-        def filtered_add_coordinate(  # noqa: ANN202
-            name: str,
-            *,
-            dimensions: tuple[str, ...],
-            **kwargs,  # noqa: ANN003
-        ):
-            """Skip adding non-binned dims as 1D dimension coordinates."""
-            # Check if this is a non-binned dim being added as a 1D dimension coordinate
-            # (i.e., the coordinate name matches a non-binned dim and has only 1 dimension)
-            if name in non_binned_dims and len(dimensions) == 1:
-                logger.debug(
-                    "Skipping 1D coordinate '%s' with dims %s - will be added with full spatial dims",
-                    name,
-                    dimensions,
-                )
-                return template._builder  # Return builder for chaining, but don't add
-
-            # Otherwise, call the original method
-            return original_builder_add_coordinate(name, dimensions=dimensions, **kwargs)
-
-        # Temporarily replace builder's add_coordinate
-        template._builder.add_coordinate = filtered_add_coordinate
-
-        try:
-            # Call the original _add_coordinates
-            original_add_coordinates()
-        finally:
-            # Restore the original add_coordinate method
-            template._builder.add_coordinate = original_builder_add_coordinate
-
-    # Replace the template's _add_coordinates method
-    template._add_coordinates = patched_add_coordinates
-
-    # Mark as patched to prevent duplicate patching
-    template._mdio_non_binned_patched = True
-
-
-def _update_template_from_grid_overrides(
-    template: AbstractDatasetTemplate,
-    grid_overrides: GridOverrides | None,
-    segy_dimensions: list[Dimension],
-    full_chunk_shape: tuple[int, ...],
-    chunk_size: tuple[int, ...],
-) -> None:
-    """Update template attributes to match grid plan results after grid overrides.
-
-    This function modifies the template in-place to reflect changes from grid overrides:
-    - Updates chunk shape if it changed due to overrides
-    - Updates dimension names if they changed due to overrides
-    - Adds non-binned dimensions as coordinates for NonBinned override
-    - Patches _add_coordinates to skip adding non-binned dims as dimension coordinates
-
-    Args:
-        template: The template to update
-        grid_overrides: Grid override configuration
-        segy_dimensions: Dimensions returned from grid planning
-        full_chunk_shape: Original template chunk shape
-        chunk_size: Chunk size returned from grid planning
-    """
-    # Update template to match grid_plan results after grid overrides
-    # Extract actual spatial dimensions from segy_dimensions (excluding vertical dimension)
-    actual_spatial_dims = tuple(dim.name for dim in segy_dimensions[:-1])
-
-    # Align chunk_size with actual dimensions - truncate if dimensions were filtered out
-    num_actual_spatial = len(actual_spatial_dims)
-    num_chunk_spatial = len(chunk_size) - 1  # Exclude vertical dimension chunk
-    if num_actual_spatial != num_chunk_spatial:
-        # Truncate chunk_size to match actual dimensions
-        chunk_size = chunk_size[:num_actual_spatial] + (chunk_size[-1],)
-
-    if full_chunk_shape != chunk_size:
-        logger.debug(
-            "Adjusting template chunk shape from %s to %s to match grid after overrides",
-            full_chunk_shape,
-            chunk_size,
-        )
-        template._var_chunk_shape = chunk_size
-
-    # Update dimensions if they don't match grid_plan results
-    if template.spatial_dimension_names != actual_spatial_dims:
-        logger.debug(
-            "Adjusting template dimensions from %s to %s to match grid after overrides",
-            template.spatial_dimension_names,
-            actual_spatial_dims,
-        )
-        template._dim_names = actual_spatial_dims + (template.trace_domain,)
-        template._grid_overrides_applied = True
-
-    # If using NonBinned override, expose non-binned dims as logical coordinates on the template instance
-    # and patch _add_coordinates to skip adding them as 1D dimension coordinates
-    if grid_overrides is not None and grid_overrides.non_binned and grid_overrides.non_binned_dims:
-        non_binned_dims = tuple(grid_overrides.non_binned_dims)
-        logger.debug(
-            "NonBinned grid override: exposing non-binned dims as coordinates: %s",
-            non_binned_dims,
-        )
-        # Append any missing names; keep existing order and avoid duplicates
-        existing = set(template.coordinate_names)
-        to_add = tuple(n for n in non_binned_dims if n not in existing)
-        if to_add:
-            template._logical_coord_names = template._logical_coord_names + to_add
-            template._grid_overrides_applied = True
-
-        # Patch _add_coordinates to skip adding non-binned dims as 1D dimension coordinates
-        # This prevents them from being added with wrong dimensions (e.g., just "trace")
-        # They will be added later by build_dataset with full spatial_dimension_names
-        _patch_add_coordinates_for_non_binned(template, set(non_binned_dims))
-
-
-def _scan_for_headers(
-    segy_file_kwargs: SegyFileArguments,
-    segy_file_info: SegyFileInfo,
-    template: AbstractDatasetTemplate,
-    grid_overrides: GridOverrides | None = None,
-) -> tuple[list[Dimension], SegyHeaderArray]:
-    """Extract trace dimensions and index headers from the SEG-Y file.
-
-    This is an expensive operation.
-    It scans the SEG-Y file in chunks by using ProcessPoolExecutor.
-
-    Note:
-        If grid_overrides are applied to the template before calling this function,
-        the chunk_size returned from get_grid_plan should match the template's chunk shape.
-    """
-    full_chunk_shape = template.full_chunk_shape
-    segy_dimensions, chunk_size, segy_headers = get_grid_plan(
-        segy_file_kwargs=segy_file_kwargs,
-        segy_file_info=segy_file_info,
-        return_headers=True,
-        template=template,
-        chunksize=full_chunk_shape,
-        grid_overrides=grid_overrides,
-    )
-
-    _update_template_from_grid_overrides(
-        template=template,
-        grid_overrides=grid_overrides,
-        segy_dimensions=segy_dimensions,
-        full_chunk_shape=full_chunk_shape,
-        chunk_size=chunk_size,
-    )
-
-    return segy_dimensions, segy_headers
-
-
-def _build_and_check_grid(
-    segy_dimensions: list[Dimension],
-    segy_file_info: SegyFileInfo,
-    segy_headers: SegyHeaderArray,
-) -> Grid:
-    """Build and check the grid from the SEG-Y headers and dimensions.
-
-    Args:
-        segy_dimensions: List of of all SEG-Y dimensions to build grid from.
-        segy_file_info: SegyFileInfo instance containing the SEG-Y file information.
-        segy_headers: Headers read in from SEG-Y file for building the trace map.
-
-    Returns:
-        A grid instance populated with the dimensions and trace index map.
-
-    Raises:
-        GridTraceCountError: If number of traces in SEG-Y file does not match the parsed grid
-    """
-    grid = Grid(dims=segy_dimensions)
-    num_traces = segy_file_info.num_traces
-    grid_density_qc(grid, num_traces)
-    grid.build_map(segy_headers)
-    # Check grid validity by comparing trace numbers
-    if np.sum(grid.live_mask) != num_traces:
-        for dim_name in grid.dim_names:
-            dim_min, dim_max = grid.get_min(dim_name), grid.get_max(dim_name)
-            logger.warning("%s min: %s max: %s", dim_name, dim_min, dim_max)
-        logger.warning("Ingestion grid shape: %s.", grid.shape)
-        raise GridTraceCountError(np.sum(grid.live_mask), num_traces)
-    return grid
-
-
-def _add_raw_headers_to_template(mdio_template: AbstractDatasetTemplate) -> AbstractDatasetTemplate:
-    """Add raw headers capability to the MDIO template by monkey-patching its _add_variables method.
-
-    This function modifies the template's _add_variables method to also add a raw headers variable
-    with the following characteristics:
-    - Same rank as the Headers variable (all dimensions except vertical)
-    - Name: "RawHeaders"
-    - Type: ScalarType.HEADERS
-    - No coordinates
-    - zstd compressor
-    - No additional metadata
-    - Chunked the same as the Headers variable
-
-    Args:
-        mdio_template: The MDIO template to mutate
-    Returns:
-        The mutated MDIO template
-    """
-    # Check if raw headers enhancement has already been applied to avoid duplicate additions
-    if hasattr(mdio_template, "_mdio_raw_headers_enhanced"):
-        return mdio_template
-
-    # Store the original _add_variables method
-    original_add_variables = mdio_template._add_variables
-
-    def enhanced_add_variables() -> None:
-        # Call the original method first
-        original_add_variables()
-
-        # Now add the raw headers variable
-        chunk_shape = mdio_template.full_chunk_shape[:-1]
-
-        # Create chunk grid metadata
-        chunk_metadata = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=chunk_shape))
-
-        # Add the raw headers variable using the builder's add_variable method
-        mdio_template._builder.add_variable(
-            name="raw_headers",
-            long_name="Raw Headers",
-            dimensions=mdio_template.spatial_dimension_names,
-            data_type=ScalarType.BYTES240,
-            compressor=Blosc(cname=BloscCname.zstd),
-            coordinates=None,  # No coordinates as specified
-            metadata=VariableMetadata(chunk_grid=chunk_metadata),
-        )
-
-    # Replace the template's _add_variables method
-    mdio_template._add_variables = enhanced_add_variables
-
-    # Mark the template as enhanced to prevent duplicate monkey-patching
-    mdio_template._mdio_raw_headers_enhanced = True
-
-    return mdio_template
-
-
-def _chunk_variable(ds: Dataset, target_variable_name: str) -> None:
-    """Determines and sets the chunking for a specific Variable in the Dataset."""
-    # Find variable index by name
-    index = next((i for i, obj in enumerate(ds.variables) if obj.name == target_variable_name), None)
-
-    def determine_target_size(var_type: str) -> int:
-        """Determines the target size (in bytes) for a Variable based on its type."""
-        if var_type == "bool":
-            return MAX_SIZE_LIVE_MASK
-        return MAX_COORDINATES_BYTES
-
-    # Create the chunk grid metadata
-    var_type = ds.variables[index].data_type
-    full_shape = tuple(dim.size for dim in ds.variables[index].dimensions)
-    target_size = determine_target_size(var_type)
-
-    chunk_shape = get_constrained_chunksize(full_shape, var_type, target_size)
-    chunk_grid = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=chunk_shape))
-
-    # Create variable metadata if it doesn't exist
-    if ds.variables[index].metadata is None:
-        ds.variables[index].metadata = VariableMetadata()
-
-    ds.variables[index].metadata.chunk_grid = chunk_grid
 
 
 def _coerce_grid_overrides(
@@ -354,7 +27,7 @@ def _coerce_grid_overrides(
     """Normalize public ``grid_overrides`` input into a :class:`GridOverrides` model.
 
     The internal ingestion pipeline only accepts the typed model. A legacy ``dict`` is
-    converted via :meth:`GridOverrides.from_legacy_dict` and a deprecation message is logged.
+    converted and a deprecation message is logged.
     """
     if grid_overrides is None:
         return None
@@ -369,7 +42,7 @@ def _coerce_grid_overrides(
     return GridOverrides.model_validate(grid_overrides)
 
 
-def segy_to_mdio(  # noqa PLR0913
+def segy_to_mdio(  # noqa: PLR0913
     segy_spec: SegySpec,
     mdio_template: AbstractDatasetTemplate,
     input_path: UPath | Path | str,
@@ -391,101 +64,15 @@ def segy_to_mdio(  # noqa PLR0913
         grid_overrides: Option to add grid overrides. Prefer a :class:`mdio.GridOverrides`
             instance; ``dict`` is still accepted but emits a :class:`DeprecationWarning`.
         segy_header_overrides: Option to override specific SEG-Y headers during ingestion.
-
-    Raises:
-        FileExistsError: If the output location already exists and overwrite is False.
     """
     typed_grid_overrides = _coerce_grid_overrides(grid_overrides)
 
-    settings = MDIOSettings()
-
-    _validate_spec_in_template(segy_spec, mdio_template)
-
-    input_path = _normalize_path(input_path)
-    output_path = _normalize_path(output_path)
-
-    if not overwrite and output_path.exists():
-        err = f"Output location '{output_path.as_posix()}' exists. Set `overwrite=True` if intended."
-        raise FileExistsError(err)
-
-    segy_settings = SegyFileSettings(storage_options=input_path.storage_options)
-    segy_file_kwargs: SegyFileArguments = {
-        "url": input_path.as_posix(),
-        "spec": segy_spec,
-        "settings": segy_settings,
-        "header_overrides": segy_header_overrides,
-    }
-    segy_file_info = get_segy_file_info(segy_file_kwargs)
-
-    segy_dimensions, segy_headers = _scan_for_headers(
-        segy_file_kwargs,
-        segy_file_info,
-        template=mdio_template,
-        grid_overrides=typed_grid_overrides,
-    )
-    grid = _build_and_check_grid(segy_dimensions, segy_file_info, segy_headers)
-
-    _, non_dim_coords = _get_coordinates(grid, segy_headers, mdio_template)
-
-    # Explicitly delete segy_headers to free memory - coordinate values have been copied
-    del segy_headers
-
-    header_dtype = to_structured_type(segy_spec.trace.header.dtype)
-
-    if settings.raw_headers:
-        if zarr.config.get("default_zarr_format") == ZarrFormat.V2:
-            logger.warning("Raw headers are only supported for Zarr v3. Skipping raw headers.")
-        else:
-            logger.warning("MDIO__IMPORT__RAW_HEADERS is experimental and expected to change or be removed.")
-            mdio_template = _add_raw_headers_to_template(mdio_template)
-
-    spatial_unit = _get_spatial_coordinate_unit(segy_file_info)
-    mdio_template = _update_template_units(mdio_template, spatial_unit)
-    mdio_ds: Dataset = mdio_template.build_dataset(name=mdio_template.name, sizes=grid.shape, header_dtype=header_dtype)
-
-    _add_grid_override_to_metadata(dataset=mdio_ds, grid_overrides=typed_grid_overrides)
-
-    # Dynamically chunk the variables based on their type
-    _chunk_variable(ds=mdio_ds, target_variable_name="trace_mask")  # trace_mask is a Variable and not a Coordinate
-    for coord in mdio_template.coordinate_names:
-        _chunk_variable(ds=mdio_ds, target_variable_name=coord)
-
-    xr_dataset: xr_Dataset = to_xarray_dataset(mdio_ds=mdio_ds)
-
-    xr_dataset, drop_vars_delayed = _populate_coordinates(
-        dataset=xr_dataset,
-        grid=grid,
-        coords=non_dim_coords,
-        spatial_coordinate_scalar=segy_file_info.coordinate_scalar,
-    )
-
-    xr_dataset = _add_segy_file_headers(xr_dataset, segy_file_info)
-
-    xr_dataset.trace_mask.data[:] = grid.live_mask
-    # IMPORTANT: Do not drop the "trace_mask" here, as it will be used later in
-    # blocked_io.to_zarr() -> _workers.trace_worker()
-
-    # This will create the Zarr store with the correct structure but with empty arrays
-    to_mdio(xr_dataset, output_path=output_path, mode="w", compute=False)
-
-    # This will write the non-dimension coordinates and trace mask
-    # We also remove dimensions that don't have associated coordinates
-    unindexed_dims = [d for d in xr_dataset.dims if d not in xr_dataset.coords]
-    [drop_vars_delayed.remove(d) for d in unindexed_dims]
-    meta_ds = xr_dataset[drop_vars_delayed + ["trace_mask"]]
-    to_mdio(meta_ds, output_path=output_path, mode="r+", compute=True)
-
-    # Now we can drop them to simplify chunked write of the data variable
-    xr_dataset = xr_dataset.drop_vars(drop_vars_delayed)
-
-    # Write the headers and traces in chunks using grid_map to indicate dead traces
-    default_variable_name = mdio_template.default_variable_name
-    # This is an memory-expensive and time-consuming read-write operation
-    # performed in chunks to save the memory
-    blocked_io.to_zarr(
-        segy_file_kwargs=segy_file_kwargs,
+    return run_segy_ingestion(
+        segy_spec=segy_spec,
+        mdio_template=mdio_template,
+        input_path=input_path,
         output_path=output_path,
-        grid_map=grid.map,
-        dataset=xr_dataset,
-        data_variable_name=default_variable_name,
+        overwrite=overwrite,
+        grid_overrides=typed_grid_overrides,
+        segy_header_overrides=segy_header_overrides,
     )
diff --git a/src/mdio/ingestion/__init__.py b/src/mdio/ingestion/__init__.py
index 00baad73..6624f7e6 100644
--- a/src/mdio/ingestion/__init__.py
+++ b/src/mdio/ingestion/__init__.py
@@ -1 +1,19 @@
-"""MDIO ingestion helpers."""
+"""MDIO ingestion pipeline components.
+
+This is the advanced ingestion namespace. ``CoordinateSpec`` is intentionally not
+re-exported here; its canonical home is :mod:`mdio.builder.templates.types`.
+"""
+
+from mdio.ingestion.schema import DimensionSpec
+from mdio.ingestion.schema import ResolvedSchema
+from mdio.ingestion.segy.index_strategies import IndexStrategy
+from mdio.ingestion.segy.index_strategies import IndexStrategyRegistry
+from mdio.ingestion.segy.pipeline import run_segy_ingestion
+
+__all__ = [
+    "DimensionSpec",
+    "IndexStrategy",
+    "IndexStrategyRegistry",
+    "ResolvedSchema",
+    "run_segy_ingestion",
+]
diff --git a/src/mdio/ingestion/dataset_factory.py b/src/mdio/ingestion/dataset_factory.py
new file mode 100644
index 00000000..84d9b496
--- /dev/null
+++ b/src/mdio/ingestion/dataset_factory.py
@@ -0,0 +1,262 @@
+"""Schema-driven factory for MDIO datasets."""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+from typing import Any
+
+from mdio.builder.dataset_builder import MDIODatasetBuilder
+from mdio.builder.schemas import compressors
+from mdio.builder.schemas.chunk_grid import RegularChunkGrid
+from mdio.builder.schemas.chunk_grid import RegularChunkShape
+from mdio.builder.schemas.dtype import ScalarType
+from mdio.builder.schemas.v1.variable import CoordinateMetadata
+from mdio.builder.schemas.v1.variable import VariableMetadata
+from mdio.core.utils_write import MAX_COORDINATES_BYTES
+from mdio.core.utils_write import MAX_SIZE_LIVE_MASK
+from mdio.core.utils_write import get_constrained_chunksize
+
+if TYPE_CHECKING:
+    from mdio.builder.schemas import Dataset
+    from mdio.builder.schemas.dtype import StructuredType
+    from mdio.builder.schemas.v1.units import AllUnitModel
+    from mdio.ingestion.schema import ResolvedSchema
+
+logger = logging.getLogger(__name__)
+
+
+def _chunk_variable(ds: Dataset, target_variable_name: str) -> None:
+    """Determines and sets the chunking for a specific Variable in the Dataset."""
+    index = next((i for i, obj in enumerate(ds.variables) if obj.name == target_variable_name), None)
+    if index is None:
+        return
+
+    def determine_target_size(var_type: str) -> int:
+        """Determines the target size (in bytes) for a Variable based on its type."""
+        if var_type == "bool":
+            return MAX_SIZE_LIVE_MASK
+        return MAX_COORDINATES_BYTES
+
+    var_type = ds.variables[index].data_type
+    full_shape = tuple(dim.size for dim in ds.variables[index].dimensions)
+    target_size = determine_target_size(var_type)
+
+    chunk_shape = get_constrained_chunksize(full_shape, var_type, target_size)
+    chunk_grid = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=chunk_shape))
+
+    if ds.variables[index].metadata is None:
+        ds.variables[index].metadata = VariableMetadata()
+
+    ds.variables[index].metadata.chunk_grid = chunk_grid
+
+
+def _resolve_chunks(chunk_shape: tuple[int, ...], sizes: tuple[int, ...]) -> tuple[int, ...]:
+    """Resolve chunk shapes by substituting -1 with actual sizes.
+
+    Args:
+        chunk_shape: Configured chunk shape.
+        sizes: Actual sizes of each dimension.
+
+    Returns:
+        tuple[int, ...]: Resolved chunk shape.
+    """
+    return tuple(size if chunk_size == -1 else chunk_size for chunk_size, size in zip(chunk_shape, sizes, strict=True))
+
+
+def _create_dataset_builder(schema: ResolvedSchema) -> MDIODatasetBuilder:
+    """Create and initialize the MDIODatasetBuilder with attributes.
+
+    Args:
+        schema: Resolved schema.
+
+    Returns:
+        MDIODatasetBuilder: The initialized dataset builder.
+    """
+    attributes = dict(schema.metadata) if schema.metadata else {}
+    attributes["defaultVariableName"] = schema.default_variable_name
+
+    return MDIODatasetBuilder(name=schema.name, attributes=attributes)
+
+
+def _add_dimensions_and_coordinates(
+    builder: MDIODatasetBuilder,
+    schema: ResolvedSchema,
+    sizes: tuple[int, ...],
+    units: dict[str, AllUnitModel],
+) -> None:
+    """Add dimensions and coordinates to the builder.
+
+    Args:
+        builder: MDIO dataset builder.
+        schema: Resolved schema.
+        sizes: Actual sizes of each dimension.
+        units: Dictionary mapping coordinate/dimension names to AllUnitModel.
+
+    Raises:
+        ValueError: If a coordinate with the same name already exists but has different attributes.
+    """
+    for dim_spec, size in zip(schema.dimensions, sizes, strict=True):
+        builder.add_dimension(dim_spec.name, size)
+
+    for dim_spec in schema.dimensions:
+        if dim_spec.is_calculated and dim_spec.name != "trace":
+            continue
+        builder.add_coordinate(
+            name=dim_spec.name,
+            dimensions=(dim_spec.name,),
+            data_type=dim_spec.dtype,
+            metadata=CoordinateMetadata(units_v1=units.get(dim_spec.name)),
+        )
+
+    compressor = compressors.Blosc(cname=compressors.BloscCname.zstd)
+    for coord_spec in schema.coordinates:
+        try:
+            builder.add_coordinate(
+                name=coord_spec.name,
+                dimensions=coord_spec.dimensions,
+                data_type=coord_spec.dtype,
+                compressor=compressor,
+                metadata=CoordinateMetadata(units_v1=units.get(coord_spec.name)),
+            )
+        except ValueError as exc:
+            if "same name twice" not in str(exc):
+                raise
+
+
+def _add_trace_mask_and_headers(
+    builder: MDIODatasetBuilder,
+    schema: ResolvedSchema,
+    resolved_chunks: tuple[int, ...],
+    header_dtype: StructuredType | None,
+) -> None:
+    """Add trace mask and headers variables to the builder.
+
+    Args:
+        builder: MDIO dataset builder.
+        schema: Resolved schema.
+        resolved_chunks: Resolved chunk shapes.
+        header_dtype: Structured dtype for trace headers.
+    """
+    compressor = compressors.Blosc(cname=compressors.BloscCname.zstd)
+    spatial_dim_names = tuple(dim.name for dim in schema.dimensions if dim.is_spatial)
+    coordinate_names = [coord.name for coord in schema.coordinates]
+
+    builder.add_variable(
+        name="trace_mask",
+        dimensions=spatial_dim_names,
+        data_type=ScalarType.BOOL,
+        compressor=compressor,
+        coordinates=coordinate_names,
+    )
+
+    if header_dtype is not None:
+        chunk_grid = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=resolved_chunks[:-1]))
+        builder.add_variable(
+            name="headers",
+            dimensions=spatial_dim_names,
+            data_type=header_dtype,
+            compressor=compressor,
+            coordinates=coordinate_names,
+            metadata=VariableMetadata(chunk_grid=chunk_grid),
+        )
+
+
+def _add_main_and_extra_variables(
+    builder: MDIODatasetBuilder,
+    schema: ResolvedSchema,
+    resolved_chunks: tuple[int, ...],
+    units: dict[str, AllUnitModel],
+    extra_variables: list[dict[str, Any]],
+) -> None:
+    """Add main data and extra variables to the builder.
+
+    Args:
+        builder: MDIO dataset builder.
+        schema: Resolved schema.
+        resolved_chunks: Resolved chunk shapes.
+        units: Dictionary mapping coordinate/dimension names to AllUnitModel.
+        extra_variables: Optional list of additional variables.
+    """
+    compressor = compressors.Blosc(cname=compressors.BloscCname.zstd)
+    coordinate_names = [coord.name for coord in schema.coordinates]
+
+    chunk_grid = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=resolved_chunks))
+    builder.add_variable(
+        name=schema.default_variable_name,
+        dimensions=tuple(dim.name for dim in schema.dimensions),
+        data_type=ScalarType.FLOAT32,
+        compressor=compressor,
+        coordinates=coordinate_names,
+        metadata=VariableMetadata(chunk_grid=chunk_grid, units_v1=units.get(schema.default_variable_name)),
+    )
+
+    for var_dict in extra_variables:
+        builder.add_variable(**var_dict)
+
+
+def _apply_dynamic_chunking(ds: Dataset, coordinate_names: list[str]) -> None:
+    """Dynamically chunk trace_mask and coordinates based on their data types and sizes.
+
+    Args:
+        ds: The dataset model.
+        coordinate_names: List of coordinate names.
+    """
+    _chunk_variable(ds=ds, target_variable_name="trace_mask")
+    for coord_name in coordinate_names:
+        _chunk_variable(ds=ds, target_variable_name=coord_name)
+
+
+def build_mdio_dataset(
+    schema: ResolvedSchema,
+    sizes: tuple[int, ...],
+    header_dtype: StructuredType | None = None,
+    units: dict[str, AllUnitModel] | None = None,
+    extra_variables: list[dict[str, Any]] | None = None,
+) -> Dataset:
+    """Build an MDIO Dataset model purely from a ResolvedSchema and dimensions sizes.
+
+    Args:
+        schema: Resolved schema describing dimensions, coordinates, metadata, etc.
+        sizes: Actual sizes of each dimension.
+        header_dtype: Structured dtype for trace headers.
+        units: Dictionary mapping coordinate/dimension names to AllUnitModel.
+        extra_variables: Optional list of additional variables to add (e.g. raw_headers).
+
+    Returns:
+        Dataset: The completed Dataset schema model.
+    """
+    units = units or {}
+    extra_variables = extra_variables or []
+
+    resolved_chunks = _resolve_chunks(schema.chunk_shape, sizes)
+    builder = _create_dataset_builder(schema)
+
+    _add_dimensions_and_coordinates(
+        builder=builder,
+        schema=schema,
+        sizes=sizes,
+        units=units,
+    )
+
+    _add_trace_mask_and_headers(
+        builder=builder,
+        schema=schema,
+        resolved_chunks=resolved_chunks,
+        header_dtype=header_dtype,
+    )
+
+    _add_main_and_extra_variables(
+        builder=builder,
+        schema=schema,
+        resolved_chunks=resolved_chunks,
+        units=units,
+        extra_variables=extra_variables,
+    )
+
+    ds = builder.build()
+
+    coordinate_names = [coord.name for coord in schema.coordinates]
+    _apply_dynamic_chunking(ds=ds, coordinate_names=coordinate_names)
+
+    return ds
diff --git a/src/mdio/ingestion/metadata.py b/src/mdio/ingestion/metadata.py
index 4cc617f0..cd86535c 100644
--- a/src/mdio/ingestion/metadata.py
+++ b/src/mdio/ingestion/metadata.py
@@ -9,7 +9,7 @@
     from mdio.segy.geometry import GridOverrides
 
 
-def _add_grid_override_to_metadata(dataset: Dataset, grid_overrides: GridOverrides | None) -> None:
+def add_grid_override_to_metadata(dataset: Dataset, grid_overrides: GridOverrides | None) -> None:
     """Add grid override to Dataset metadata if needed."""
     if dataset.metadata.attributes is None:
         dataset.metadata.attributes = {}
diff --git a/src/mdio/ingestion/schema/__init__.py b/src/mdio/ingestion/schema/__init__.py
new file mode 100644
index 00000000..2d82ac07
--- /dev/null
+++ b/src/mdio/ingestion/schema/__init__.py
@@ -0,0 +1,20 @@
+"""Immutable schema models that describe a dataset before any data is scanned.
+
+``SchemaResolver`` lives in :mod:`mdio.ingestion.schema.resolver` and is imported from there
+directly to avoid a circular import (the resolver depends on the SEG-Y index-strategy
+registry, which in turn depends on these models).
+"""
+
+from mdio.ingestion.schema.models import CollapseToTraceEffect
+from mdio.ingestion.schema.models import DimensionSpec
+from mdio.ingestion.schema.models import InsertTraceDimEffect
+from mdio.ingestion.schema.models import ResolvedSchema
+from mdio.ingestion.schema.models import SchemaEffect
+
+__all__ = [
+    "CollapseToTraceEffect",
+    "DimensionSpec",
+    "InsertTraceDimEffect",
+    "ResolvedSchema",
+    "SchemaEffect",
+]
diff --git a/src/mdio/ingestion/schema/models.py b/src/mdio/ingestion/schema/models.py
new file mode 100644
index 00000000..cce198f9
--- /dev/null
+++ b/src/mdio/ingestion/schema/models.py
@@ -0,0 +1,227 @@
+"""Immutable schema models that fully describe a dataset before any data is scanned."""
+
+from __future__ import annotations
+
+from abc import ABC
+from abc import abstractmethod
+from typing import TYPE_CHECKING
+from typing import Any
+
+from pydantic import BaseModel
+from pydantic import Field
+
+from mdio.builder.schemas.dtype import ScalarType
+from mdio.builder.templates.types import CoordinateSpec  # noqa: TC001  (pydantic needs this at runtime)
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+
+class DimensionSpec(BaseModel):
+    """Specification for a dimension in the final dataset.
+
+    Attributes:
+        name: Dimension name (e.g. ``"inline"``, ``"shot_point"``, ``"trace"``, ``"time"``).
+        is_spatial: Whether this is a spatial dimension. ``False`` only for the vertical
+            (data-domain) dimension.
+        is_calculated: Whether this dimension's coordinate values are produced by an index
+            strategy at ingest time (e.g. ``shot_index`` from a template, or ``trace`` added
+            by a grid override) rather than read directly. The pipeline uses this to give a
+            clear error if a required strategy was not enabled.
+        dtype: Data type for the dimension coordinate.
+    """
+
+    name: str
+    is_spatial: bool = True
+    is_calculated: bool = False
+    dtype: ScalarType = ScalarType.INT32
+
+
+class ResolvedSchema(BaseModel):
+    """Final resolved schema for dataset ingestion.
+
+    This represents the complete, resolved schema after applying template configuration
+    and grid overrides. It contains everything needed to build the dataset without
+    any further decision-making.
+
+    Attributes:
+        name: Name of the dataset/template
+        dimensions: Ordered list of dimension specifications
+        coordinates: List of coordinate specifications
+        chunk_shape: Tuple of chunk sizes for each dimension
+        metadata: Additional metadata attributes
+        default_variable_name: Name of the main data variable
+    """
+
+    name: str
+    dimensions: list[DimensionSpec]
+    coordinates: list[CoordinateSpec]
+    chunk_shape: tuple[int, ...]
+    metadata: dict[str, Any] = Field(default_factory=dict)
+    default_variable_name: str = "amplitude"
+
+    def required_header_fields(self) -> set[str]:
+        """Names that must be readable from SEG-Y trace headers to materialize this schema."""
+        fields = {dim.name for dim in self.dimensions if dim.is_spatial and not dim.is_calculated}
+        fields.update(coord.name for coord in self.coordinates)
+        # coordinate_scalar is always needed to scale X/Y coordinates.
+        fields.add("coordinate_scalar")
+        return fields
+
+    def spatial_dimensions(self) -> list[DimensionSpec]:
+        """Get only spatial dimensions (excludes vertical/trace domain)."""
+        return [dim for dim in self.dimensions if dim.is_spatial]
+
+    def missing_calculated_dimensions(self, produced_names: Iterable[str]) -> list[str]:
+        """Return calculated spatial dimensions that no index strategy produced.
+
+        Calculated dimensions (e.g. ``shot_index``, or ``trace`` from a grid override) are
+        not read directly from SEG-Y headers; an index strategy must materialize them. The
+        pipeline uses this to fail clearly when the required strategy was not enabled.
+
+        Args:
+            produced_names: Names of dimensions actually produced after header analysis.
+
+        Returns:
+            Calculated spatial dimension names absent from ``produced_names``.
+        """
+        produced = set(produced_names)
+        return [
+            dim.name for dim in self.dimensions if dim.is_spatial and dim.is_calculated and dim.name not in produced
+        ]
+
+
+_TRACE_DIM = "trace"
+
+
+class SchemaEffect(ABC):
+    """How a trace-producing index strategy reshapes a :class:`ResolvedSchema`.
+
+    A grid override changes both the trace headers (handled by an
+    :class:`~mdio.ingestion.segy.index_strategies.IndexStrategy`) and the resolved schema
+    layout (dimensions, chunk shape, coordinates). To keep those two views from drifting,
+    the index-strategy registry is the single place that maps a ``GridOverrides`` to the
+    matching :class:`SchemaEffect`; the :class:`~mdio.ingestion.schema.resolver.SchemaResolver`
+    simply applies whatever effect it is handed.
+    """
+
+    @abstractmethod
+    def apply(self, schema: ResolvedSchema) -> ResolvedSchema:
+        """Return a new schema with this effect applied; ``schema`` is left unchanged."""
+
+
+class InsertTraceDimEffect(SchemaEffect):
+    """Insert a calculated ``trace`` dimension just before the vertical axis.
+
+    Mirrors the ``HasDuplicates`` override: no spatial dimension is collapsed, a single
+    extra ``trace`` axis disambiguates duplicate index tuples.
+
+    Args:
+        chunksize: Chunk size assigned to the inserted ``trace`` dimension.
+    """
+
+    def __init__(self, chunksize: int = 1) -> None:
+        self.chunksize = chunksize
+
+    def apply(self, schema: ResolvedSchema) -> ResolvedSchema:
+        """Insert the ``trace`` dimension and its chunk before the vertical dimension."""
+        new_dimensions: list[DimensionSpec] = []
+        new_chunk_shape: list[int] = []
+        for dim, chunk in zip(schema.dimensions, schema.chunk_shape, strict=True):
+            if dim.is_spatial:
+                new_dimensions.append(dim)
+                new_chunk_shape.append(chunk)
+                continue
+            new_dimensions.append(DimensionSpec(name=_TRACE_DIM, is_spatial=True, is_calculated=True))
+            new_chunk_shape.append(self.chunksize)
+            new_dimensions.append(dim)
+            new_chunk_shape.append(chunk)
+
+        return schema.model_copy(update={"dimensions": new_dimensions, "chunk_shape": tuple(new_chunk_shape)})
+
+
+class CollapseToTraceEffect(SchemaEffect):
+    """Collapse selected spatial dimensions into a single ``trace`` dimension.
+
+    Mirrors the ``NonBinned`` override. The collapsed dimensions are re-expressed as
+    non-dimension coordinates over ``trace``, and any coordinate that referenced a
+    collapsed dimension is rewritten to depend on ``trace`` instead.
+
+    Args:
+        chunksize: Chunk size assigned to the inserted ``trace`` dimension.
+        collapse_dims: Names of spatial dimensions to collapse. ``None`` collapses every
+            spatial dimension except the first; an empty tuple collapses nothing.
+    """
+
+    def __init__(self, chunksize: int | None, collapse_dims: tuple[str, ...] | None = None) -> None:
+        self.chunksize = chunksize
+        self.collapse_dims = collapse_dims
+
+    def _resolve_collapse_dims(self, schema: ResolvedSchema) -> tuple[str, ...]:
+        """Resolve the effective set of dimension names to collapse for ``schema``."""
+        if self.collapse_dims is not None:
+            return self.collapse_dims
+        spatial = schema.spatial_dimensions()
+        return tuple(dim.name for dim in spatial[1:]) if len(spatial) > 1 else ()
+
+    def apply(self, schema: ResolvedSchema) -> ResolvedSchema:
+        """Collapse the configured dimensions into ``trace`` and rewrite coordinates."""
+        collapse = self._resolve_collapse_dims(schema)
+        collapse_set = set(collapse)
+
+        new_dimensions: list[DimensionSpec] = []
+        new_chunk_shape: list[int] = []
+        replaced_count = 0
+        for dim, chunk in zip(schema.dimensions, schema.chunk_shape, strict=True):
+            if dim.name in collapse_set:
+                replaced_count += 1
+                continue
+            if dim.is_spatial:
+                new_dimensions.append(dim)
+                new_chunk_shape.append(chunk)
+                continue
+            if replaced_count > 0:
+                new_dimensions.append(DimensionSpec(name=_TRACE_DIM, is_spatial=True, is_calculated=True))
+                new_chunk_shape.append(self.chunksize)
+            new_dimensions.append(dim)
+            new_chunk_shape.append(chunk)
+
+        new_coordinates = self._rewrite_coordinates(schema, collapse, collapse_set, replaced_count)
+
+        return schema.model_copy(
+            update={
+                "dimensions": new_dimensions,
+                "coordinates": new_coordinates,
+                "chunk_shape": tuple(new_chunk_shape),
+            }
+        )
+
+    @staticmethod
+    def _rewrite_coordinates(
+        schema: ResolvedSchema,
+        collapse: tuple[str, ...],
+        collapse_set: set[str],
+        replaced_count: int,
+    ) -> list[CoordinateSpec]:
+        """Rewrite coordinate dimensions and append collapsed dims as ``trace`` coordinates."""
+        rewritten: list[CoordinateSpec] = []
+        for coord in schema.coordinates:
+            had_collapsed = any(name in collapse_set for name in coord.dimensions)
+            dims = tuple(name for name in coord.dimensions if name not in collapse_set)
+            if had_collapsed and replaced_count > 0:
+                dims = (*dims, _TRACE_DIM)
+            rewritten.append(coord.model_copy(update={"dimensions": dims}))
+
+        existing = {coord.name for coord in rewritten}
+        dtype_by_dim = {dim.name: dim.dtype for dim in schema.dimensions}
+        for name in collapse:
+            if name in existing:
+                continue
+            rewritten.append(
+                CoordinateSpec(
+                    name=name,
+                    dimensions=(_TRACE_DIM,),
+                    dtype=dtype_by_dim.get(name, ScalarType.INT32),
+                )
+            )
+        return rewritten
diff --git a/src/mdio/ingestion/schema/resolver.py b/src/mdio/ingestion/schema/resolver.py
new file mode 100644
index 00000000..8dffd12e
--- /dev/null
+++ b/src/mdio/ingestion/schema/resolver.py
@@ -0,0 +1,88 @@
+"""Schema resolution: turn a template + grid overrides into a final, ingestion-ready schema."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from mdio.builder.schemas.dtype import ScalarType
+from mdio.ingestion.schema.models import DimensionSpec
+from mdio.ingestion.schema.models import ResolvedSchema
+from mdio.ingestion.segy.index_strategies import IndexStrategyRegistry
+
+if TYPE_CHECKING:
+    from mdio.builder.templates.base import AbstractDatasetTemplate
+    from mdio.segy.geometry import GridOverrides
+
+
+class SchemaResolver:
+    """Resolves template + grid overrides into a final schema.
+
+    This class takes a template and optional grid overrides and produces a
+    :class:`ResolvedSchema` that completely specifies the dataset structure before any data
+    is scanned or processed. All override semantics (which dimensions collapse, where a
+    ``trace`` dimension is inserted) are owned by
+    :class:`~mdio.ingestion.segy.index_strategies.IndexStrategyRegistry`; this resolver only
+    applies the resulting :class:`~mdio.ingestion.schema.models.SchemaEffect`.
+    """
+
+    def __init__(self) -> None:
+        self._registry = IndexStrategyRegistry()
+
+    def resolve(
+        self,
+        template: AbstractDatasetTemplate,
+        grid_overrides: GridOverrides | None = None,
+    ) -> ResolvedSchema:
+        """Resolve template and overrides into final schema.
+
+        Args:
+            template: The MDIO dataset template.
+            grid_overrides: Optional grid override configuration.
+
+        Returns:
+            ResolvedSchema with all dimensions, coordinates, and metadata resolved.
+        """
+        schema = self._template_to_schema(template)
+
+        if not grid_overrides:
+            return schema
+
+        # Grid-override provenance is attached to the dataset at assembly time
+        # (mdio.ingestion.metadata.add_grid_override_to_metadata); the resolver only
+        # reshapes the schema, keeping it override-mechanics-only.
+        effect = self._registry.schema_effect(grid_overrides)
+        if effect is not None:
+            schema = effect.apply(schema)
+
+        return schema
+
+    def _template_to_schema(self, template: AbstractDatasetTemplate) -> ResolvedSchema:
+        """Convert a template to a resolved schema without overrides."""
+        calculated = set(template.calculated_dimension_names)
+        dim_dtypes = template.declare_dimension_specs()
+        dimensions = [
+            DimensionSpec(
+                name=name,
+                is_spatial=True,
+                is_calculated=name in calculated,
+                dtype=dim_dtypes.get(name, ScalarType.INT32),
+            )
+            for name in template.spatial_dimension_names
+        ]
+        vertical_name = template.dimension_names[-1]
+        dimensions.append(
+            DimensionSpec(
+                name=vertical_name,
+                is_spatial=False,
+                dtype=dim_dtypes.get(vertical_name, ScalarType.INT32),
+            )
+        )
+
+        return ResolvedSchema(
+            name=template.name,
+            dimensions=dimensions,
+            coordinates=list(template.declare_coordinate_specs()),
+            chunk_shape=template.full_chunk_shape,
+            metadata=template._load_dataset_attributes() or {},
+            default_variable_name=template.default_variable_name,
+        )
diff --git a/src/mdio/ingestion/schema_resolver.py b/src/mdio/ingestion/schema_resolver.py
deleted file mode 100644
index f28da1df..00000000
--- a/src/mdio/ingestion/schema_resolver.py
+++ /dev/null
@@ -1,211 +0,0 @@
-"""Schema resolution: turn a template + grid overrides into a final, ingestion-ready schema."""
-
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-from typing import Any
-
-from pydantic import BaseModel
-from pydantic import Field
-
-from mdio.builder.templates.types import CoordinateSpec  # noqa: TC001  (pydantic needs this at runtime)
-
-if TYPE_CHECKING:
-    from mdio.builder.templates.base import AbstractDatasetTemplate
-    from mdio.segy.geometry import GridOverrides
-
-
-class DimensionSpec(BaseModel):
-    """Specification for a dimension in the final dataset.
-
-    Attributes:
-        name: Dimension name (e.g. ``"inline"``, ``"shot_point"``, ``"trace"``, ``"time"``).
-        is_spatial: Whether this is a spatial dimension. ``False`` only for the vertical
-            (data-domain) dimension.
-        is_calculated: Whether this dimension's coordinate values are produced by an index
-            strategy at ingest time (e.g. ``shot_index`` from a template, or ``trace`` added
-            by a grid override) rather than read directly. The pipeline uses this to give a
-            clear error if a required strategy was not enabled.
-    """
-
-    name: str
-    is_spatial: bool = True
-    is_calculated: bool = False
-
-
-class ResolvedSchema(BaseModel):
-    """Final resolved schema for dataset ingestion.
-
-    This represents the complete, resolved schema after applying template configuration
-    and grid overrides. It contains everything needed to build the dataset without
-    any further decision-making.
-
-    Attributes:
-        name: Name of the dataset/template
-        dimensions: Ordered list of dimension specifications
-        coordinates: List of coordinate specifications
-        chunk_shape: Tuple of chunk sizes for each dimension
-        metadata: Additional metadata attributes
-        default_variable_name: Name of the main data variable
-    """
-
-    name: str
-    dimensions: list[DimensionSpec]
-    coordinates: list[CoordinateSpec]
-    chunk_shape: tuple[int, ...]
-    metadata: dict[str, Any] = Field(default_factory=dict)
-    default_variable_name: str = "amplitude"
-
-    def required_header_fields(self) -> set[str]:
-        """Names that must be readable from SEG-Y trace headers to materialize this schema."""
-        fields = {dim.name for dim in self.dimensions if dim.is_spatial and not dim.is_calculated}
-        fields.update(coord.name for coord in self.coordinates)
-        # coordinate_scalar is always needed to scale X/Y coordinates.
-        fields.add("coordinate_scalar")
-        return fields
-
-    def spatial_dimensions(self) -> list[DimensionSpec]:
-        """Get only spatial dimensions (excludes vertical/trace domain)."""
-        return [dim for dim in self.dimensions if dim.is_spatial]
-
-
-class SchemaResolver:
-    """Resolves template + grid overrides into a final schema.
-
-    This class takes a template and optional grid overrides and produces
-    a ResolvedSchema that completely specifies the dataset structure before
-    any data is scanned or processed.
-    """
-
-    def resolve(
-        self,
-        template: AbstractDatasetTemplate,
-        grid_overrides: GridOverrides | None = None,
-    ) -> ResolvedSchema:
-        """Resolve template and overrides into final schema.
-
-        Args:
-            template: The MDIO dataset template
-            grid_overrides: Optional grid override configuration
-
-        Returns:
-            ResolvedSchema with all dimensions, coordinates, and metadata resolved
-        """
-        schema = self._template_to_schema(template)
-
-        if grid_overrides:
-            schema = self._apply_override_transformations(schema, grid_overrides)
-
-        return schema
-
-    def _template_to_schema(self, template: AbstractDatasetTemplate) -> ResolvedSchema:
-        """Convert a template to a resolved schema without overrides."""
-        calculated = set(template.calculated_dimension_names)
-        dimensions = [
-            DimensionSpec(name=name, is_spatial=True, is_calculated=name in calculated)
-            for name in template.spatial_dimension_names
-        ]
-        dimensions.append(DimensionSpec(name=template.dimension_names[-1], is_spatial=False))
-
-        return ResolvedSchema(
-            name=template.name,
-            dimensions=dimensions,
-            coordinates=list(template.declare_coordinate_specs()),
-            chunk_shape=template.full_chunk_shape,
-            metadata=template._load_dataset_attributes() or {},
-            default_variable_name=template.default_variable_name,
-        )
-
-    def _apply_override_transformations(
-        self,
-        schema: ResolvedSchema,
-        grid_overrides: GridOverrides,
-    ) -> ResolvedSchema:
-        """Apply grid override transformations to the schema."""
-        schema_dict = schema.model_dump()
-
-        if grid_overrides.non_binned:
-            schema_dict = self._apply_non_binned_transform(schema_dict, grid_overrides)
-        elif grid_overrides.has_duplicates:
-            schema_dict = self._apply_duplicate_transform(schema_dict)
-
-        schema_dict["metadata"]["gridOverrides"] = grid_overrides.to_legacy_dict()
-
-        return ResolvedSchema(**schema_dict)
-
-    def _apply_non_binned_transform(
-        self,
-        schema_dict: dict,
-        grid_overrides: GridOverrides,
-    ) -> dict:
-        """Replace selected spatial dimensions with a single ``trace`` dimension."""
-        dimensions = schema_dict["dimensions"]
-        chunk_shape = list(schema_dict["chunk_shape"])
-
-        replace_dims = grid_overrides.non_binned_dims
-        if replace_dims is None:
-            # Default: replace all spatial dims except the first.
-            spatial_dims = [d for d in dimensions if d["is_spatial"]]
-            replace_dims = [d["name"] for d in spatial_dims[1:]] if len(spatial_dims) > 1 else []
-
-        new_dimensions = []
-        new_chunk_shape = []
-        replaced_count = 0
-
-        for i, dim in enumerate(dimensions):
-            if dim["name"] in replace_dims:
-                replaced_count += 1
-                continue
-            if dim["is_spatial"]:
-                new_dimensions.append(dim)
-                new_chunk_shape.append(chunk_shape[i])
-            else:
-                if replaced_count > 0:
-                    new_dimensions.append(DimensionSpec(name="trace", is_spatial=True, is_calculated=True).model_dump())
-                    new_chunk_shape.append(grid_overrides.chunksize)
-                new_dimensions.append(dim)
-                new_chunk_shape.append(chunk_shape[i])
-
-        schema_dict["dimensions"] = new_dimensions
-        schema_dict["chunk_shape"] = tuple(new_chunk_shape)
-
-        # Rewrite coordinate dimension references: collapsed dims drop out, replaced by ``trace``.
-        replaced_dims_set = set(replace_dims)
-        updated_coordinates = []
-        for coord in schema_dict["coordinates"]:
-            original_dims = coord["dimensions"]
-            had_collapsed_dims = any(d in replaced_dims_set for d in original_dims)
-            coord_dims = [d for d in original_dims if d not in replaced_dims_set]
-
-            if had_collapsed_dims and replaced_count > 0:
-                coord_dims.append("trace")
-
-            updated_coord = dict(coord)
-            updated_coord["dimensions"] = tuple(coord_dims)
-            updated_coordinates.append(updated_coord)
-        schema_dict["coordinates"] = updated_coordinates
-
-        return schema_dict
-
-    def _apply_duplicate_transform(self, schema_dict: dict) -> dict:
-        """Insert a ``trace`` dimension with chunksize 1 before the vertical dimension."""
-        dimensions = schema_dict["dimensions"]
-        chunk_shape = list(schema_dict["chunk_shape"])
-
-        new_dimensions = []
-        new_chunk_shape = []
-
-        for i, dim in enumerate(dimensions):
-            if dim["is_spatial"]:
-                new_dimensions.append(dim)
-                new_chunk_shape.append(chunk_shape[i])
-            else:
-                new_dimensions.append(DimensionSpec(name="trace", is_spatial=True, is_calculated=True).model_dump())
-                new_chunk_shape.append(1)
-                new_dimensions.append(dim)
-                new_chunk_shape.append(chunk_shape[i])
-
-        schema_dict["dimensions"] = new_dimensions
-        schema_dict["chunk_shape"] = tuple(new_chunk_shape)
-
-        return schema_dict
diff --git a/src/mdio/ingestion/segy/coordinates.py b/src/mdio/ingestion/segy/coordinates.py
index 9f49f837..783f7095 100644
--- a/src/mdio/ingestion/segy/coordinates.py
+++ b/src/mdio/ingestion/segy/coordinates.py
@@ -5,10 +5,10 @@
 import logging
 from typing import TYPE_CHECKING
 
-import numpy as np
 from segy.standards.codes import MeasurementSystem as SegyMeasurementSystem
 from segy.standards.fields import binary as binary_header_fields
 
+from mdio.builder.schemas.v1.units import AllUnitModel
 from mdio.builder.schemas.v1.units import AngleUnitEnum
 from mdio.builder.schemas.v1.units import AngleUnitModel
 from mdio.builder.schemas.v1.units import LengthUnitEnum
@@ -21,7 +21,6 @@
     from xarray import Dataset as xr_Dataset
 
     from mdio.builder.templates.base import AbstractDatasetTemplate
-    from mdio.core.dimension import Dimension
     from mdio.core.grid import Grid
     from mdio.segy.file import SegyFileInfo
 
@@ -41,33 +40,7 @@
 ]
 
 
-def _get_coordinates(
-    grid: Grid,
-    segy_headers: SegyHeaderArray,
-    mdio_template: AbstractDatasetTemplate,
-) -> tuple[list[Dimension], dict[str, SegyHeaderArray]]:
-    """Get the data dim and non-dim coordinates from the SEG-Y headers and MDIO template.
-
-    Select a subset of the segy_dimensions that corresponds to the MDIO dimensions
-    The dimensions are ordered as in the MDIO template.
-    The last dimension is always the vertical domain dimension
-
-    Args:
-        grid: Inferred MDIO grid for SEG-Y file.
-        segy_headers: Headers read in from SEG-Y file.
-        mdio_template: The MDIO template to use for the conversion.
-
-    Returns:
-        A tuple containing:
-            - A list of dimension coordinates (1-D arrays).
-            - A dict of non-dimension coordinates (str: N-D arrays).
-    """
-    dimensions_coords = [grid.select_dim(name) for name in mdio_template.dimension_names]
-    non_dim_coords = {name: np.array(segy_headers[name]) for name in mdio_template.coordinate_names}
-    return dimensions_coords, non_dim_coords
-
-
-def _populate_coordinates(
+def populate_coordinates(
     dataset: xr_Dataset,
     grid: Grid,
     coords: dict[str, SegyHeaderArray],
@@ -100,7 +73,7 @@ def _populate_coordinates(
     return dataset, drop_vars_delayed
 
 
-def _get_spatial_coordinate_unit(segy_file_info: SegyFileInfo) -> LengthUnitModel | None:
+def get_spatial_coordinate_unit(segy_file_info: SegyFileInfo) -> LengthUnitModel | None:
     """Get the coordinate unit from the SEG-Y headers."""
     measurement_system_code = int(segy_file_info.binary_header_dict[MEASUREMENT_SYSTEM_KEY])
 
@@ -121,21 +94,26 @@ def _get_spatial_coordinate_unit(segy_file_info: SegyFileInfo) -> LengthUnitMode
     return LengthUnitModel(length=unit)
 
 
-def _update_template_units(template: AbstractDatasetTemplate, unit: LengthUnitModel | None) -> AbstractDatasetTemplate:
-    """Update the template with dynamic and some pre-defined units."""
-    new_units = {key: AngleUnitModel(angle=AngleUnitEnum.DEGREES) for key in ANGLE_UNIT_KEYS}
-
-    if unit is None:
-        template.add_units(new_units)
-        return template
-
-    for key in SPATIAL_UNIT_KEYS:
-        current_value = template.get_unit_by_key(key)
-        if current_value is not None:
-            logger.warning("Unit for %s already in template. Will keep the original unit: %s", key, current_value)
-            continue
-
-        new_units[key] = unit
-
-    template.add_units(new_units)
-    return template
+def resolve_units(
+    template: AbstractDatasetTemplate,
+    unit: LengthUnitModel | None,
+) -> dict[str, AllUnitModel]:
+    """Resolve the units for the template and dynamic SEG-Y units without mutating the template."""
+    resolved = dict(template.units)
+
+    for key in ANGLE_UNIT_KEYS:
+        if key not in resolved:
+            resolved[key] = AngleUnitModel(angle=AngleUnitEnum.DEGREES)
+
+    if unit is not None:
+        for key in SPATIAL_UNIT_KEYS:
+            if key not in resolved:
+                resolved[key] = unit
+            else:
+                logger.warning(
+                    "Unit for %s already in template. Will keep the original unit: %s",
+                    key,
+                    resolved[key],
+                )
+
+    return resolved
diff --git a/src/mdio/ingestion/segy/file_headers.py b/src/mdio/ingestion/segy/file_headers.py
index fe69116f..5c056b81 100644
--- a/src/mdio/ingestion/segy/file_headers.py
+++ b/src/mdio/ingestion/segy/file_headers.py
@@ -22,7 +22,7 @@
 logger = logging.getLogger(__name__)
 
 
-def _add_segy_file_headers(xr_dataset: xr_Dataset, segy_file_info: SegyFileInfo) -> xr_Dataset:
+def add_segy_file_headers(xr_dataset: xr_Dataset, segy_file_info: SegyFileInfo) -> xr_Dataset:
     """Attach the SEG-Y text and binary file headers as attrs on a scalar variable."""
     settings = MDIOSettings()
     mode = settings.save_segy_file_header
diff --git a/src/mdio/ingestion/index_strategies.py b/src/mdio/ingestion/segy/index_strategies.py
similarity index 91%
rename from src/mdio/ingestion/index_strategies.py
rename to src/mdio/ingestion/segy/index_strategies.py
index ede35e39..6e80b29f 100644
--- a/src/mdio/ingestion/index_strategies.py
+++ b/src/mdio/ingestion/segy/index_strategies.py
@@ -21,6 +21,8 @@
 from numpy.lib import recfunctions as rfn
 
 from mdio.core import Dimension
+from mdio.ingestion.schema.models import CollapseToTraceEffect
+from mdio.ingestion.schema.models import InsertTraceDimEffect
 from mdio.ingestion.segy.header_analysis import ShotGunGeometryType
 from mdio.ingestion.segy.header_analysis import StreamerShotGeometryType
 from mdio.ingestion.segy.header_analysis import analyze_lines_for_guns
@@ -35,6 +37,7 @@
     from segy.arrays import HeaderArray
 
     from mdio.builder.templates.base import AbstractDatasetTemplate
+    from mdio.ingestion.schema.models import SchemaEffect
     from mdio.segy.geometry import GridOverrides
 
 logger = logging.getLogger(__name__)
@@ -351,7 +354,35 @@ def compute_dimensions(self, headers: HeaderArray, dim_names: tuple[str, ...]) -
 
 
 class IndexStrategyRegistry:
-    """Picks the right :class:`IndexStrategy` from grid overrides + template hints."""
+    """Picks the right :class:`IndexStrategy` from grid overrides + template hints.
+
+    The registry is the single source of truth for override semantics: it maps a
+    :class:`~mdio.segy.geometry.GridOverrides` both to the header-transforming
+    :class:`IndexStrategy` (:meth:`create_strategy`) and to the schema-reshaping
+    :class:`~mdio.ingestion.schema.models.SchemaEffect` (:meth:`schema_effect`). Keeping
+    both derivations here prevents the header view and the schema view from drifting.
+    """
+
+    def schema_effect(self, grid_overrides: GridOverrides | None) -> SchemaEffect | None:
+        """Return the schema reshaping implied by ``grid_overrides``, if any.
+
+        Only ``non_binned`` and ``has_duplicates`` change the dimension/coordinate layout;
+        every other override transforms headers in place without reshaping the schema.
+
+        Args:
+            grid_overrides: Typed grid override configuration, or ``None``.
+
+        Returns:
+            The matching :class:`SchemaEffect`, or ``None`` when no layout change applies.
+        """
+        if not grid_overrides:
+            return None
+        if grid_overrides.non_binned:
+            collapse_dims = None if grid_overrides.non_binned_dims is None else tuple(grid_overrides.non_binned_dims)
+            return CollapseToTraceEffect(chunksize=grid_overrides.chunksize, collapse_dims=collapse_dims)
+        if grid_overrides.has_duplicates:
+            return InsertTraceDimEffect(chunksize=1)
+        return None
 
     def create_strategy(
         self,
diff --git a/src/mdio/ingestion/segy/pipeline.py b/src/mdio/ingestion/segy/pipeline.py
new file mode 100644
index 00000000..8dd69706
--- /dev/null
+++ b/src/mdio/ingestion/segy/pipeline.py
@@ -0,0 +1,194 @@
+"""Ingestion pipeline for SEG-Y to MDIO."""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+import numpy as np
+from segy.config import SegyFileSettings
+
+from mdio.api.io import _normalize_path
+from mdio.converters.exceptions import GridTraceCountError
+from mdio.converters.type_converter import to_structured_type
+from mdio.core.grid import Grid
+from mdio.ingestion.dataset_factory import build_mdio_dataset
+from mdio.ingestion.grid_qc import grid_density_qc
+from mdio.ingestion.metadata import add_grid_override_to_metadata
+from mdio.ingestion.schema.resolver import SchemaResolver
+from mdio.ingestion.segy.coordinates import get_spatial_coordinate_unit
+from mdio.ingestion.segy.coordinates import resolve_units
+from mdio.ingestion.segy.raw_headers import build_raw_header_variables
+from mdio.ingestion.segy.reader import read_index_headers
+from mdio.ingestion.segy.serializer import serialize_to_mdio
+from mdio.ingestion.segy.validation import validate_spec_in_template
+from mdio.segy.file import get_segy_file_info
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from segy.config import SegyHeaderOverrides
+    from segy.schema import SegySpec
+    from upath import UPath
+
+    from mdio.builder.templates.base import AbstractDatasetTemplate
+    from mdio.core.dimension import Dimension
+    from mdio.ingestion.schema import ResolvedSchema
+    from mdio.segy.file import SegyFileArguments
+    from mdio.segy.geometry import GridOverrides
+
+logger = logging.getLogger(__name__)
+
+
+def _resolve_output_path(output_path: UPath | Path | str, overwrite: bool) -> UPath:
+    """Normalize the output path and enforce the overwrite policy.
+
+    Args:
+        output_path: Requested output location.
+        overwrite: Whether an existing location may be overwritten.
+
+    Returns:
+        The normalized output path.
+
+    Raises:
+        FileExistsError: If the location exists and ``overwrite`` is False.
+    """
+    output_path = _normalize_path(output_path)
+    if not overwrite and output_path.exists():
+        err = f"Output location '{output_path.as_posix()}' exists. Set `overwrite=True` if intended."
+        raise FileExistsError(err)
+    return output_path
+
+
+def _verify_calculated_dimensions(
+    schema: ResolvedSchema,
+    dimensions: list[Dimension],
+    template_name: str,
+) -> None:
+    """Ensure every calculated dimension required by the schema was produced.
+
+    Args:
+        schema: The resolved schema.
+        dimensions: Dimensions produced by header analysis.
+        template_name: Template name, used only for the error message.
+
+    Raises:
+        ValueError: If a required calculated dimension is missing.
+    """
+    missing = schema.missing_calculated_dimensions(dim.name for dim in dimensions)
+    if missing:
+        err = (
+            f"Required computed fields {sorted(missing)} for template {template_name} not found "
+            f"after grid overrides. Please ensure the correct grid overrides are applied."
+        )
+        raise ValueError(err)
+
+
+def _build_grid(dimensions: list[Dimension], indexed_headers: np.ndarray, num_traces: int) -> Grid:
+    """Build the ingestion grid, run density QC, and verify the live trace count.
+
+    Args:
+        dimensions: Ordered spatial + vertical dimensions.
+        indexed_headers: Transformed trace headers used to build the grid map.
+        num_traces: Number of traces reported by the SEG-Y file.
+
+    Returns:
+        The built and validated grid.
+
+    Raises:
+        GridTraceCountError: If the live trace count does not match ``num_traces``.
+    """
+    grid = Grid(dims=dimensions)
+    grid_density_qc(grid, num_traces)
+    grid.build_map(indexed_headers)
+
+    live_trace_count = int(np.sum(grid.live_mask))
+    if live_trace_count != num_traces:
+        for dim_name in grid.dim_names:
+            logger.warning("%s min: %s max: %s", dim_name, grid.get_min(dim_name), grid.get_max(dim_name))
+        logger.warning("Ingestion grid shape: %s.", grid.shape)
+        raise GridTraceCountError(live_trace_count, num_traces)
+
+    return grid
+
+
+def run_segy_ingestion(  # noqa: PLR0913
+    segy_spec: SegySpec,
+    mdio_template: AbstractDatasetTemplate,
+    input_path: UPath | Path | str,
+    output_path: UPath | Path | str,
+    overwrite: bool = False,
+    grid_overrides: GridOverrides | None = None,
+    segy_header_overrides: SegyHeaderOverrides | None = None,
+) -> None:
+    """Convert SEG-Y to MDIO.
+
+    Pipeline phases: schema resolution, header analysis, index strategy, grid build,
+    dataset build, data write.
+
+    Args:
+        segy_spec: The SEG-Y specification to use for the conversion.
+        mdio_template: The MDIO template to use for the conversion.
+        input_path: The universal path of the input SEG-Y file.
+        output_path: The universal path for the output MDIO v1 file.
+        overwrite: Whether to overwrite the output file if it already exists. Defaults to False.
+        grid_overrides: Grid override configuration for non-standard geometries.
+        segy_header_overrides: Option to override specific SEG-Y headers during ingestion.
+
+    Raises:
+        FileExistsError: If the output location already exists and overwrite is False.
+        ValueError: If required fields are missing from segy_spec or required computed
+            dimensions are not produced after grid overrides are applied.
+        GridTraceCountError: If the live trace count in the built grid does not match
+            the number of traces reported by the SEG-Y file.
+    """
+    validate_spec_in_template(segy_spec, mdio_template)
+
+    input_path = _normalize_path(input_path)
+    output_path = _resolve_output_path(output_path, overwrite)
+
+    segy_file_kwargs: SegyFileArguments = {
+        "url": input_path.as_posix(),
+        "spec": segy_spec,
+        "settings": SegyFileSettings(storage_options=input_path.storage_options),
+        "header_overrides": segy_header_overrides,
+    }
+    segy_file_info = get_segy_file_info(segy_file_kwargs)
+
+    spatial_unit = get_spatial_coordinate_unit(segy_file_info)
+    units = resolve_units(mdio_template, spatial_unit)
+
+    schema = SchemaResolver().resolve(mdio_template, grid_overrides)
+
+    indexed_headers, dimensions = read_index_headers(
+        segy_file_kwargs=segy_file_kwargs,
+        file_info=segy_file_info,
+        schema=schema,
+        grid_overrides=grid_overrides,
+        synthesize_dims=mdio_template.synthesize_missing_dims,
+        template=mdio_template,
+    )
+    _verify_calculated_dimensions(schema, dimensions, mdio_template.name)
+
+    grid = _build_grid(dimensions, indexed_headers, segy_file_info.num_traces)
+
+    header_dtype = to_structured_type(segy_spec.trace.header.dtype)
+    extra_variables = build_raw_header_variables(schema)
+    mdio_ds = build_mdio_dataset(
+        schema=schema,
+        sizes=grid.shape,
+        header_dtype=header_dtype,
+        units=units,
+        extra_variables=extra_variables,
+    )
+    add_grid_override_to_metadata(dataset=mdio_ds, grid_overrides=grid_overrides)
+
+    serialize_to_mdio(
+        mdio_ds=mdio_ds,
+        grid=grid,
+        indexed_headers=indexed_headers,
+        schema=schema,
+        file_info=segy_file_info,
+        segy_file_kwargs=segy_file_kwargs,
+        output_path=output_path,
+    )
diff --git a/src/mdio/ingestion/segy/raw_headers.py b/src/mdio/ingestion/segy/raw_headers.py
new file mode 100644
index 00000000..fea93562
--- /dev/null
+++ b/src/mdio/ingestion/segy/raw_headers.py
@@ -0,0 +1,64 @@
+"""Experimental raw trace-header ingestion, isolated for one-file removal.
+
+The ``MDIO__IMPORT__RAW_HEADERS`` feature is experimental and expected to change or be
+removed. All of its gating and variable construction lives here so that retiring it is a
+single-file delete plus removing the call site in :mod:`mdio.ingestion.segy.pipeline`.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+import zarr
+
+from mdio.builder.schemas.chunk_grid import RegularChunkGrid
+from mdio.builder.schemas.chunk_grid import RegularChunkShape
+from mdio.builder.schemas.compressors import Blosc
+from mdio.builder.schemas.compressors import BloscCname
+from mdio.builder.schemas.dtype import ScalarType
+from mdio.builder.schemas.v1.variable import VariableMetadata
+from mdio.constants import ZarrFormat
+from mdio.core.config import MDIOSettings
+
+if TYPE_CHECKING:
+    from typing import Any
+
+    from mdio.ingestion.schema import ResolvedSchema
+
+logger = logging.getLogger(__name__)
+
+
+def build_raw_header_variables(schema: ResolvedSchema) -> list[dict[str, Any]]:
+    """Return extra-variable specs for the experimental raw-headers feature.
+
+    Args:
+        schema: The resolved schema; its spatial dimensions and chunk shape size the
+            ``raw_headers`` variable.
+
+    Returns:
+        A single-element list describing the ``raw_headers`` variable when the feature is
+        enabled and the active Zarr format supports it (v3); otherwise an empty list.
+    """
+    settings = MDIOSettings()
+    if not settings.raw_headers:
+        return []
+
+    if zarr.config.get("default_zarr_format") == ZarrFormat.V2:
+        logger.warning("Raw headers are only supported for Zarr v3. Skipping raw headers.")
+        return []
+
+    logger.warning("MDIO__IMPORT__RAW_HEADERS is experimental and expected to change or be removed.")
+    spatial_dim_names = tuple(dim.name for dim in schema.dimensions if dim.is_spatial)
+    chunk_metadata = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=schema.chunk_shape[:-1]))
+    return [
+        {
+            "name": "raw_headers",
+            "long_name": "Raw Headers",
+            "dimensions": spatial_dim_names,
+            "data_type": ScalarType.BYTES240,
+            "compressor": Blosc(cname=BloscCname.zstd),
+            "coordinates": None,
+            "metadata": VariableMetadata(chunk_grid=chunk_metadata),
+        }
+    ]
diff --git a/src/mdio/ingestion/segy/reader.py b/src/mdio/ingestion/segy/reader.py
new file mode 100644
index 00000000..9acf4b30
--- /dev/null
+++ b/src/mdio/ingestion/segy/reader.py
@@ -0,0 +1,83 @@
+"""SEG-Y index header reader and grid dimension builder."""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+from mdio.core.dimension import Dimension
+from mdio.ingestion.segy.index_strategies import IndexStrategyRegistry
+from mdio.segy.parsers import parse_headers
+
+if TYPE_CHECKING:
+    import numpy as np
+
+    from mdio.builder.templates.base import AbstractDatasetTemplate
+    from mdio.ingestion.schema import ResolvedSchema
+    from mdio.segy.file import SegyFileArguments
+    from mdio.segy.file import SegyFileInfo
+    from mdio.segy.geometry import GridOverrides
+
+logger = logging.getLogger(__name__)
+
+
+def read_index_headers(  # noqa: PLR0913
+    segy_file_kwargs: SegyFileArguments,
+    file_info: SegyFileInfo,
+    schema: ResolvedSchema,
+    grid_overrides: GridOverrides | None,
+    synthesize_dims: tuple[str, ...],
+    template: AbstractDatasetTemplate | None = None,
+) -> tuple[np.ndarray, list[Dimension]]:
+    """Parse SEG-Y headers, apply index strategy transformations, and build dimensions.
+
+    Args:
+        segy_file_kwargs: Arguments for opening the SEG-Y file.
+        file_info: Metadata info of the SEG-Y file.
+        schema: Final resolved MDIO schema.
+        grid_overrides: Grid override parameters if any.
+        synthesize_dims: Dimensions to synthesize if missing from headers.
+        template: Optional dataset template for specialized strategy resolution.
+
+    Returns:
+        tuple:
+            - np.ndarray: The transformed and indexed trace headers.
+            - list[Dimension]: The completed list of spatial + vertical Dimensions.
+    """
+    # 1. Determine subset of header fields to parse
+    spec = segy_file_kwargs.get("spec")
+    spec_fields = {field.name for field in spec.trace.header.fields} if spec else set()
+
+    # Drop any synthesized or missing dimensions/coordinates that aren't in the physical file spec
+    subset = tuple(f for f in schema.required_header_fields() if f in spec_fields)
+
+    # 2. Parse headers
+    parsed_headers = parse_headers(
+        segy_file_kwargs=segy_file_kwargs,
+        num_traces=file_info.num_traces,
+        subset=subset,
+    )
+
+    # 3. Apply Index Strategy
+    strategy = IndexStrategyRegistry().create_strategy(
+        grid_overrides=grid_overrides,
+        synthesize_dims=synthesize_dims,
+        template=template,
+    )
+    logger.info("Using index strategy: %s", strategy.name)
+
+    indexed_headers = strategy.transform_headers(parsed_headers)
+
+    # 4. Compute spatial dimensions
+    dim_names = tuple(d.name for d in schema.dimensions if d.is_spatial)
+    dimensions = strategy.compute_dimensions(indexed_headers, dim_names)
+
+    # 5. Append vertical dimension
+    sample_labels = file_info.sample_labels / 1000  # normalize
+    if all(sample_labels.astype("int64") == sample_labels):
+        sample_labels = sample_labels.astype("int64")
+
+    vertical_dim_name = schema.dimensions[-1].name
+    dimensions.append(Dimension(coords=sample_labels, name=vertical_dim_name))
+
+    return indexed_headers, dimensions
diff --git a/src/mdio/ingestion/segy/serializer.py b/src/mdio/ingestion/segy/serializer.py
new file mode 100644
index 00000000..037563af
--- /dev/null
+++ b/src/mdio/ingestion/segy/serializer.py
@@ -0,0 +1,101 @@
+"""Serializer that formats the populated xarray dataset and writes it to MDIO / Zarr."""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+from mdio.api.io import to_mdio
+from mdio.builder.xarray_builder import to_xarray_dataset
+from mdio.ingestion.segy.coordinates import populate_coordinates
+from mdio.ingestion.segy.file_headers import add_segy_file_headers
+from mdio.segy import blocked_io
+
+if TYPE_CHECKING:
+    from upath import UPath
+
+    from mdio.builder.schemas import Dataset
+    from mdio.core.grid import Grid
+    from mdio.ingestion.schema import ResolvedSchema
+    from mdio.segy.file import SegyFileArguments
+    from mdio.segy.file import SegyFileInfo
+
+logger = logging.getLogger(__name__)
+
+
+def serialize_to_mdio(  # noqa: PLR0913
+    mdio_ds: Dataset,
+    grid: Grid,
+    indexed_headers: np.ndarray,
+    schema: ResolvedSchema,
+    file_info: SegyFileInfo,
+    segy_file_kwargs: SegyFileArguments,
+    output_path: UPath,
+) -> None:
+    """Serialize the constructed MDIO Dataset and populate it with trace data.
+
+    This function coordinates coordinate population, trace mask writing, file headers,
+    staged to_mdio Zarr writes, and the final blocked-I/O writing.
+
+    Args:
+        mdio_ds: The constructed Dataset schema model.
+        grid: The built ingestion Grid.
+        indexed_headers: Transformed trace headers.
+        schema: Resolved schema of the dataset.
+        file_info: Metadata info of the SEG-Y file.
+        segy_file_kwargs: SEG-Y file arguments.
+        output_path: Universal output path for writing.
+    """
+    # 1. Extract non-dimension coordinates from transformed headers
+    non_dim_coords = {}
+    for coord in schema.coordinates:
+        if coord.name in indexed_headers.dtype.names:
+            non_dim_coords[coord.name] = np.array(indexed_headers[coord.name])
+        else:
+            logger.warning(
+                "Coordinate '%s' expected by schema but not found in transformed headers.",
+                coord.name,
+            )
+
+    # 2. Convert to xarray dataset
+    xr_dataset = to_xarray_dataset(mdio_ds=mdio_ds)
+
+    # 3. Populate coordinates with grid mappings
+    xr_dataset, drop_vars_delayed = populate_coordinates(
+        dataset=xr_dataset,
+        grid=grid,
+        coords=non_dim_coords,
+        spatial_coordinate_scalar=file_info.coordinate_scalar,
+    )
+
+    # 4. Attach SEG-Y text and binary file headers
+    xr_dataset = add_segy_file_headers(xr_dataset, file_info)
+
+    # 5. Populate trace mask
+    xr_dataset.trace_mask.data[:] = grid.live_mask
+
+    # 6. Initialize Zarr store with metadata (staged empty arrays)
+    to_mdio(xr_dataset, output_path=output_path, mode="w", compute=False)
+
+    # 7. Write delayed coordinate variables and trace mask
+    unindexed_dims = [d for d in xr_dataset.dims if d not in xr_dataset.coords]
+    for d in unindexed_dims:
+        if d in drop_vars_delayed:
+            drop_vars_delayed.remove(d)
+
+    meta_ds = xr_dataset[drop_vars_delayed + ["trace_mask"]]
+    to_mdio(meta_ds, output_path=output_path, mode="r+", compute=True)
+
+    # Drop delayed vars to prepare for blocked-I/O writing
+    xr_dataset = xr_dataset.drop_vars(drop_vars_delayed)
+
+    # 8. Write bulk trace data and structured headers via blocked-I/O workers
+    blocked_io.to_zarr(
+        segy_file_kwargs=segy_file_kwargs,
+        output_path=output_path,
+        grid_map=grid.map,
+        dataset=xr_dataset,
+        data_variable_name=schema.default_variable_name,
+    )
diff --git a/src/mdio/ingestion/segy/validation.py b/src/mdio/ingestion/segy/validation.py
index 1c74c96f..f9a6d079 100644
--- a/src/mdio/ingestion/segy/validation.py
+++ b/src/mdio/ingestion/segy/validation.py
@@ -10,7 +10,7 @@
     from mdio.builder.templates.base import AbstractDatasetTemplate
 
 
-def _validate_spec_in_template(segy_spec: SegySpec, mdio_template: AbstractDatasetTemplate) -> None:
+def validate_spec_in_template(segy_spec: SegySpec, mdio_template: AbstractDatasetTemplate) -> None:
     """Validate that the SegySpec has all required fields in the MDIO template."""
     # Import here to avoid circular imports at module load time
     from mdio.builder.templates.seismic_3d_obn import Seismic3DObnReceiverGathersTemplate  # noqa: PLC0415
diff --git a/src/mdio/segy/geometry.py b/src/mdio/segy/geometry.py
index c2b80b80..aa24270a 100644
--- a/src/mdio/segy/geometry.py
+++ b/src/mdio/segy/geometry.py
@@ -1,9 +1,9 @@
-"""SEG-Y grid override configuration model and legacy executor shim.
+"""SEG-Y grid override configuration model and template-compatibility helpers.
 
 The Pydantic :class:`GridOverrides` model is the supported public API for configuring
-grid overrides. The :class:`GridOverrider` class is retained as a thin shim that
-delegates to :class:`mdio.ingestion.index_strategies.IndexStrategyRegistry`; it preserves
-the v1.1 ``run(...)`` contract for callers that still pass a legacy ``dict``.
+grid overrides. Header transformation and schema reshaping are owned by
+:class:`mdio.ingestion.segy.index_strategies.IndexStrategyRegistry`; this module only holds
+the typed config plus the template-compatibility guards used to validate override pairings.
 """
 
 from __future__ import annotations
@@ -16,15 +16,7 @@
 from pydantic import ConfigDict
 from pydantic import Field
 
-from mdio.ingestion.index_strategies import IndexStrategyRegistry
-from mdio.segy.exceptions import GridOverrideMissingParameterError
-from mdio.segy.exceptions import GridOverrideUnknownError
-
 if TYPE_CHECKING:
-    from collections.abc import Sequence
-
-    from segy.arrays import HeaderArray
-
     from mdio.builder.templates.base import AbstractDatasetTemplate
 
 
@@ -142,101 +134,3 @@ def _validate_template_for_overrides(
             actual = type(template).__name__ if template is not None else "None"
             msg = f"calculate_shot_index only supports Seismic3DObnReceiverGathersTemplate, got {actual}."
             raise TypeError(msg)
-
-
-class GridOverrider:
-    """Legacy facade that adapts the dict-based v1.1 API onto :class:`IndexStrategyRegistry`.
-
-    Existing callers (notably :func:`mdio.segy.utilities.get_grid_plan`) still build a
-    legacy ``dict`` of grid overrides and call :meth:`run`. This class translates the dict
-    into a typed :class:`GridOverrides`, dispatches to the appropriate
-    :class:`IndexStrategy`, and returns the ``(headers, names, chunksize)`` tuple shape
-    those callers depend on. It will be removed once all callers move to the typed API.
-    """
-
-    def __init__(self) -> None:
-        self._registry = IndexStrategyRegistry()
-
-    def run(
-        self,
-        index_headers: HeaderArray,
-        index_names: Sequence[str],
-        grid_overrides: dict[str, Any] | None,
-        chunksize: Sequence[int] | None = None,
-        template: AbstractDatasetTemplate | None = None,
-    ) -> tuple[HeaderArray, tuple[str, ...], tuple[int, ...] | None]:
-        """Run the configured grid overrides and return updated headers/names/chunks.
-
-        Args:
-            index_headers: Parsed SEG-Y trace headers; structured numpy array.
-            index_names: Names of the index dimensions before any override is applied.
-            grid_overrides: Legacy dict of overrides (CamelCase keys).
-            chunksize: Optional chunk shape that may be expanded by overrides that add a
-                ``trace`` dimension.
-            template: Optional dataset template; used to identify coordinate fields and
-                to drive component synthesis for OBN.
-
-        Returns:
-            Tuple of ``(transformed_headers, new_index_names, new_chunksize)``. The
-            chunksize tuple is ``None`` when the caller did not pass a chunksize.
-
-        Raises:
-            GridOverrideUnknownError: When ``grid_overrides`` contains an unknown key.
-            GridOverrideMissingParameterError: When ``NonBinned`` is enabled without
-                ``chunksize`` or ``non_binned_dims``.
-
-        Notes:
-            Header-precondition checks (``GridOverrideKeysError``) are delegated to
-            :meth:`IndexStrategy.validate_headers`; template-compatibility checks
-            (``TypeError``) are delegated to :func:`_validate_template_for_overrides`.
-        """
-        grid_overrides = grid_overrides or {}
-
-        field_names = set(GridOverrides.model_fields.keys())
-        aliases = {field.alias for field in GridOverrides.model_fields.values() if field.alias}
-        valid_keys = field_names | aliases
-        for key in grid_overrides:
-            if key not in valid_keys:
-                raise GridOverrideUnknownError(key)
-
-        config = GridOverrides.model_validate(grid_overrides)
-
-        if config.non_binned:
-            missing: set[str] = set()
-            if config.chunksize is None:
-                missing.add("chunksize")
-            if not config.non_binned_dims:
-                missing.add("non_binned_dims")
-            if missing:
-                command = "NonBinned"
-                raise GridOverrideMissingParameterError(command, missing)
-
-        _validate_template_for_overrides(config, template)
-
-        synthesize_dims = _resolve_synthesize_dims(template)
-        strategy = self._registry.create_strategy(
-            grid_overrides=config,
-            synthesize_dims=synthesize_dims,
-            template=template,
-        )
-        logger.debug("Selected grid override strategy: %s", strategy.name)
-
-        strategy.validate_headers(index_headers)
-        new_headers = strategy.transform_headers(index_headers)
-
-        new_names = list(index_names)
-        new_chunks = list(chunksize) if chunksize is not None else None
-
-        # Both NonBinned and HasDuplicates add a 'trace' dim at index -1; HasDuplicates
-        # always uses chunksize 1, NonBinned uses the user-supplied value.
-        if config.non_binned or config.has_duplicates:
-            new_names.append("trace")
-            if new_chunks is not None:
-                inserted_chunk = config.chunksize if config.non_binned else 1
-                new_chunks.insert(-1, inserted_chunk)
-
-        return (
-            new_headers,
-            tuple(new_names),
-            tuple(new_chunks) if new_chunks is not None else None,
-        )
diff --git a/src/mdio/segy/utilities.py b/src/mdio/segy/utilities.py
index 64c89584..02be321d 100644
--- a/src/mdio/segy/utilities.py
+++ b/src/mdio/segy/utilities.py
@@ -9,187 +9,16 @@
 import numpy as np
 from dask.array.core import normalize_chunks
 
-from mdio.core import Dimension
-from mdio.segy.geometry import GridOverrider
-from mdio.segy.parsers import parse_headers
-
 if TYPE_CHECKING:
     from dask.array import Array as DaskArray
     from numpy.typing import DTypeLike
     from numpy.typing import NDArray
-    from segy.arrays import HeaderArray
     from segy.schema import SegySpec
 
-    from mdio.builder.templates.base import AbstractDatasetTemplate
-    from mdio.segy.file import SegyFileArguments
-    from mdio.segy.file import SegyFileInfo
-    from mdio.segy.geometry import GridOverrides
 
 logger = logging.getLogger(__name__)
 
 
-def get_grid_plan(  # noqa:  C901, PLR0912, PLR0913, PLR0915
-    segy_file_kwargs: SegyFileArguments,
-    segy_file_info: SegyFileInfo,
-    chunksize: tuple[int, ...] | None,
-    template: AbstractDatasetTemplate,
-    return_headers: bool = False,
-    grid_overrides: GridOverrides | None = None,
-) -> tuple[list[Dimension], tuple[int, ...]] | tuple[list[Dimension], tuple[int, ...], HeaderArray]:
-    """Infer dimension ranges, and increments.
-
-    Generates multiple dimensions with the following steps:
-    1. Read index headers
-    2. Get min, max, and increments
-    3. Create `Dimension` with appropriate range, index, and description.
-    4. Create `Dimension` for sample axis using binary header.
-
-    Args:
-        segy_file_kwargs: SEG-Y file arguments.
-        segy_file_info: SegyFileInfo instance containing the num_traces and sample_labels.
-        chunksize:  Chunk sizes to be used in grid plan.
-        template: MDIO template where coordinate names and domain will be taken.
-        return_headers: Option to return parsed headers with `Dimension` objects. Default is False.
-        grid_overrides: Typed grid override configuration, or ``None`` for no overrides.
-
-    Returns:
-        All index dimensions and chunksize or dimensions and chunksize together with header values.
-
-    Raises:
-        ValueError: If computed fields are not found after grid overrides.
-    """
-    # Keep only dimension and non-dimension coordinates excluding the vertical axis
-    horizontal_dimensions = template.spatial_dimension_names
-    horizontal_coordinates = horizontal_dimensions + template.coordinate_names
-    # Exclude calculated dimensions - they don't exist in SEG-Y headers
-    calculated_dims = set(template.calculated_dimension_names)
-
-    # Remove any to be computed fields - preserve order by using list comprehension instead of set operations
-    computed_fields = set(template.calculated_dimension_names)
-    horizontal_coordinates = tuple(c for c in horizontal_coordinates if c not in computed_fields)
-
-    # Ensure non_binned_dims are included in the headers to parse, even if not in template
-    if grid_overrides is not None and grid_overrides.non_binned_dims:
-        for dim in grid_overrides.non_binned_dims:
-            if dim not in horizontal_coordinates:
-                horizontal_coordinates = horizontal_coordinates + (dim,)
-
-    # For OBN template: skip 'component' if not in SEG-Y spec (will be synthesized later)
-    # Import here to avoid circular imports at module load time
-    from mdio.builder.templates.seismic_3d_obn import Seismic3DObnReceiverGathersTemplate  # noqa: PLC0415
-
-    spec = segy_file_kwargs.get("spec")
-    spec_fields = {field.name for field in spec.trace.header.fields} if spec else set()
-    fields_to_skip = calculated_dims.copy()
-
-    if isinstance(template, Seismic3DObnReceiverGathersTemplate) and "component" not in spec_fields:
-        fields_to_skip.add("component")
-
-    headers_subset = parse_headers(
-        segy_file_kwargs=segy_file_kwargs,
-        num_traces=segy_file_info.num_traces,
-        subset=tuple(c for c in horizontal_coordinates if c not in fields_to_skip),
-    )
-
-    # The legacy GridOverrider still consumes the dict shape; dump only at this boundary.
-    # Future PR will replace GridOverrider.
-    override_handler = GridOverrider()
-    headers_subset, horizontal_coordinates, chunksize = override_handler.run(
-        headers_subset,
-        horizontal_coordinates,
-        chunksize=chunksize,
-        grid_overrides=grid_overrides.to_legacy_dict() if grid_overrides is not None else {},
-        template=template,
-    )
-
-    # After grid overrides, determine final spatial dimensions and their chunk sizes
-    non_binned_active = grid_overrides is not None and grid_overrides.non_binned
-    non_binned_dims: set[str] = (
-        set(grid_overrides.non_binned_dims)
-        if non_binned_active and grid_overrides is not None and grid_overrides.non_binned_dims
-        else set()
-    )
-
-    # Create mapping from dimension name to original chunk size for easy lookup
-    original_spatial_dims = list(template.spatial_dimension_names)
-    original_chunks = list(template.full_chunk_shape[:-1])  # Exclude vertical (sample/time) dimension
-    dim_to_chunk = dict(zip(original_spatial_dims, original_chunks, strict=True))
-
-    # Final spatial dimensions: keep trace and original dims, exclude non-binned dims
-    final_spatial_dims = []
-    final_spatial_chunks = []
-    for name in horizontal_coordinates:
-        if name in non_binned_dims:
-            continue  # Skip dimensions that became coordinates
-        if name == "trace":
-            chunk_val = (
-                int(grid_overrides.chunksize)
-                if non_binned_active and grid_overrides is not None and grid_overrides.chunksize is not None
-                else 1
-            )
-            final_spatial_dims.append(name)
-            final_spatial_chunks.append(chunk_val)
-        elif name in dim_to_chunk:
-            # Use original chunk size for known dimensions
-            final_spatial_dims.append(name)
-            final_spatial_chunks.append(dim_to_chunk[name])
-
-    if len(computed_fields) > 0 and not computed_fields.issubset(headers_subset.dtype.names):
-        err = (
-            f"Required computed fields {sorted(computed_fields)} for template {template.name} "
-            f"not found after grid overrides. Please ensure correct overrides are applied."
-        )
-        raise ValueError(err)
-
-    # Create dimensions from final_spatial_dims plus any computed fields that were added by grid overrides
-    all_dimension_names = list(final_spatial_dims)
-    added_computed_fields = []
-    for computed_field in computed_fields:
-        if computed_field in headers_subset.dtype.names and computed_field not in all_dimension_names:
-            # Insert in template order
-            if computed_field in template.spatial_dimension_names:
-                insert_idx = template.spatial_dimension_names.index(computed_field)
-                # Find position in all_dimension_names that corresponds to this template position
-                actual_idx = min(insert_idx, len(all_dimension_names))
-                all_dimension_names.insert(actual_idx, computed_field)
-                # Track where we inserted and what chunk size it should have
-                template_chunk_idx = template.spatial_dimension_names.index(computed_field)
-                chunk_val = template.full_chunk_shape[template_chunk_idx]
-                added_computed_fields.append((actual_idx, chunk_val))
-            else:
-                all_dimension_names.append(computed_field)
-                added_computed_fields.append((len(all_dimension_names) - 1, 1))
-
-    # Build chunksize including chunks for computed fields
-    if added_computed_fields:
-        chunk_list = list(final_spatial_chunks)
-        for insert_idx, chunk_val in sorted(added_computed_fields, reverse=True):
-            chunk_list.insert(insert_idx, chunk_val)
-        chunksize = tuple(chunk_list + [template.full_chunk_shape[-1]])
-    else:
-        chunksize = tuple(final_spatial_chunks + [template.full_chunk_shape[-1]])
-
-    dimensions = []
-    for dim_name in all_dimension_names:
-        if dim_name not in headers_subset.dtype.names:
-            continue
-        dim_unique = np.unique(headers_subset[dim_name])
-        dimensions.append(Dimension(coords=dim_unique, name=dim_name))
-
-    sample_labels = segy_file_info.sample_labels / 1000  # normalize
-
-    if all(sample_labels.astype("int64") == sample_labels):
-        sample_labels = sample_labels.astype("int64")
-
-    vertical_dim = Dimension(coords=sample_labels, name=template.trace_domain)
-    dimensions.append(vertical_dim)
-
-    if return_headers:
-        return dimensions, chunksize, headers_subset
-
-    return dimensions, chunksize
-
-
 def project_headers_to_segy_spec(headers: DaskArray, segy_spec: SegySpec) -> DaskArray:
     """Project stored MDIO trace headers onto the SegySpec trace header layout.
 
diff --git a/tests/unit/ingestion/test_dataset_factory.py b/tests/unit/ingestion/test_dataset_factory.py
new file mode 100644
index 00000000..cb32d925
--- /dev/null
+++ b/tests/unit/ingestion/test_dataset_factory.py
@@ -0,0 +1,128 @@
+"""Tests for the schema-driven ``build_mdio_dataset`` factory."""
+
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from mdio.builder.schemas.dtype import ScalarType
+from mdio.builder.schemas.v1.units import LengthUnitEnum
+from mdio.builder.schemas.v1.units import LengthUnitModel
+from mdio.builder.templates.types import CoordinateSpec
+from mdio.converters.type_converter import to_structured_type
+from mdio.ingestion.dataset_factory import build_mdio_dataset
+from mdio.ingestion.schema import DimensionSpec
+from mdio.ingestion.schema import ResolvedSchema
+
+
+def _vars_by_name(dataset) -> dict:  # noqa: ANN001
+    """Index a built Dataset's variables (and coordinates) by name."""
+    return {var.name: var for var in dataset.variables}
+
+
+def _dim_names(variable) -> tuple[str, ...]:  # noqa: ANN001
+    """Return the ordered dimension names of a built Variable."""
+    return tuple(dim.name for dim in variable.dimensions)
+
+
+def _chunk_shape(variable) -> tuple[int, ...]:  # noqa: ANN001
+    """Return the configured chunk shape of a built Variable."""
+    return tuple(variable.metadata.chunk_grid.configuration.chunk_shape)
+
+
+@pytest.fixture
+def basic_schema() -> ResolvedSchema:
+    """A minimal 2-spatial-dim + vertical schema with one non-dim coordinate."""
+    return ResolvedSchema(
+        name="Toy3D",
+        dimensions=[
+            DimensionSpec(name="inline", is_spatial=True, dtype=ScalarType.INT32),
+            DimensionSpec(name="crossline", is_spatial=True, dtype=ScalarType.INT32),
+            DimensionSpec(name="time", is_spatial=False, dtype=ScalarType.INT16),
+        ],
+        coordinates=[
+            CoordinateSpec(name="cdp_x", dimensions=("inline", "crossline"), dtype=ScalarType.FLOAT64),
+        ],
+        chunk_shape=(2, -1, 4),
+        default_variable_name="amplitude",
+    )
+
+
+class TestBuildMdioDataset:
+    """Tests for the structure produced by ``build_mdio_dataset``."""
+
+    def test_builds_dimensions_coordinates_and_data_variable(self, basic_schema: ResolvedSchema) -> None:
+        """All dimensions, the non-dim coordinate, trace_mask and data variable are created."""
+        dataset = build_mdio_dataset(schema=basic_schema, sizes=(2, 3, 4))
+        variables = _vars_by_name(dataset)
+
+        assert {"inline", "crossline", "time", "cdp_x", "trace_mask", "amplitude"}.issubset(variables)
+        assert _dim_names(variables["amplitude"]) == ("inline", "crossline", "time")
+        assert _dim_names(variables["trace_mask"]) == ("inline", "crossline")
+        assert _dim_names(variables["cdp_x"]) == ("inline", "crossline")
+
+    def test_resolves_negative_one_chunk_against_sizes(self, basic_schema: ResolvedSchema) -> None:
+        """A ``-1`` chunk entry is replaced by the actual dimension size."""
+        dataset = build_mdio_dataset(schema=basic_schema, sizes=(2, 3, 4))
+        amplitude = _vars_by_name(dataset)["amplitude"]
+        # chunk_shape (2, -1, 4) with sizes (2, 3, 4) -> (2, 3, 4)
+        assert _chunk_shape(amplitude) == (2, 3, 4)
+
+    def test_default_variable_name_recorded_in_attributes(self, basic_schema: ResolvedSchema) -> None:
+        """The default variable name is written to dataset attributes."""
+        dataset = build_mdio_dataset(schema=basic_schema, sizes=(2, 3, 4))
+        assert dataset.metadata.attributes["defaultVariableName"] == "amplitude"
+
+    def test_header_dtype_adds_headers_variable(self, basic_schema: ResolvedSchema) -> None:
+        """Passing a header dtype adds a ``headers`` variable over the spatial dims only."""
+        header_dtype = to_structured_type(np.dtype([("cdp_x", "int32"), ("cdp_y", "int32")]))
+        dataset = build_mdio_dataset(schema=basic_schema, sizes=(2, 3, 4), header_dtype=header_dtype)
+
+        headers = _vars_by_name(dataset)["headers"]
+        assert _dim_names(headers) == ("inline", "crossline")
+        # Headers chunk drops the vertical axis.
+        assert _chunk_shape(headers) == (2, 3)
+
+    def test_no_headers_variable_when_dtype_missing(self, basic_schema: ResolvedSchema) -> None:
+        """Without a header dtype, no ``headers`` variable is created."""
+        dataset = build_mdio_dataset(schema=basic_schema, sizes=(2, 3, 4))
+        assert "headers" not in _vars_by_name(dataset)
+
+    def test_calculated_dimension_has_no_dimension_coordinate(self) -> None:
+        """A calculated spatial dim (e.g. ``shot_index``) gets no dimension coordinate."""
+        schema = ResolvedSchema(
+            name="Calc",
+            dimensions=[
+                DimensionSpec(name="receiver", is_spatial=True, dtype=ScalarType.UINT32),
+                DimensionSpec(name="shot_index", is_spatial=True, is_calculated=True, dtype=ScalarType.INT32),
+                DimensionSpec(name="time", is_spatial=False, dtype=ScalarType.INT16),
+            ],
+            coordinates=[],
+            chunk_shape=(2, 2, 4),
+        )
+        dataset = build_mdio_dataset(schema=schema, sizes=(2, 2, 4))
+        variables = _vars_by_name(dataset)
+
+        # shot_index is still a data-variable dimension, but not its own 1-D coordinate.
+        assert _dim_names(variables["amplitude"]) == ("receiver", "shot_index", "time")
+        assert "shot_index" not in variables
+        assert "receiver" in variables
+
+    def test_extra_variables_are_appended(self, basic_schema: ResolvedSchema) -> None:
+        """Extra variable dicts (e.g. raw_headers) are added to the dataset."""
+        extra = [
+            {
+                "name": "raw_headers",
+                "dimensions": ("inline", "crossline"),
+                "data_type": ScalarType.BYTES240,
+            }
+        ]
+        dataset = build_mdio_dataset(schema=basic_schema, sizes=(2, 3, 4), extra_variables=extra)
+        assert "raw_headers" in _vars_by_name(dataset)
+
+    def test_units_attached_to_coordinate(self, basic_schema: ResolvedSchema) -> None:
+        """Units passed by name are attached to the matching coordinate metadata."""
+        meter = LengthUnitModel(length=LengthUnitEnum.METER)
+        dataset = build_mdio_dataset(schema=basic_schema, sizes=(2, 3, 4), units={"cdp_x": meter})
+        cdp_x = _vars_by_name(dataset)["cdp_x"]
+        assert cdp_x.metadata.units_v1 == meter
diff --git a/tests/unit/ingestion/test_metadata.py b/tests/unit/ingestion/test_metadata.py
index 2ba7d4c8..3ef7a928 100644
--- a/tests/unit/ingestion/test_metadata.py
+++ b/tests/unit/ingestion/test_metadata.py
@@ -4,7 +4,7 @@
 
 from types import SimpleNamespace
 
-from mdio.ingestion.metadata import _add_grid_override_to_metadata
+from mdio.ingestion.metadata import add_grid_override_to_metadata
 from mdio.segy.geometry import GridOverrides
 
 
@@ -14,19 +14,19 @@ def _make_dataset(attributes: dict | None) -> SimpleNamespace:
 
 
 class TestAddGridOverrideToMetadata:
-    """Tests for ``_add_grid_override_to_metadata``."""
+    """Tests for ``add_grid_override_to_metadata``."""
 
     def test_initializes_attributes_dict_when_none(self) -> None:
         """A ``None`` attributes dict gets replaced with an empty dict before insertion."""
         dataset = _make_dataset(attributes=None)
-        _add_grid_override_to_metadata(dataset, grid_overrides=None)
+        add_grid_override_to_metadata(dataset, grid_overrides=None)
         assert dataset.metadata.attributes == {}
 
     def test_adds_grid_overrides_when_provided(self) -> None:
         """Active grid overrides should serialize under the ``gridOverrides`` key."""
         dataset = _make_dataset(attributes=None)
         overrides = GridOverrides(has_duplicates=True, chunksize=4)
-        _add_grid_override_to_metadata(dataset, grid_overrides=overrides)
+        add_grid_override_to_metadata(dataset, grid_overrides=overrides)
         assert dataset.metadata.attributes == {
             "gridOverrides": {"HasDuplicates": True, "chunksize": 4},
         }
@@ -35,7 +35,7 @@ def test_preserves_existing_attributes(self) -> None:
         """Existing attribute keys should be preserved when adding overrides."""
         dataset = _make_dataset(attributes={"existing": "value"})
         overrides = GridOverrides(non_binned=True)
-        _add_grid_override_to_metadata(dataset, grid_overrides=overrides)
+        add_grid_override_to_metadata(dataset, grid_overrides=overrides)
         assert dataset.metadata.attributes == {
             "existing": "value",
             "gridOverrides": {"NonBinned": True},
@@ -44,5 +44,5 @@ def test_preserves_existing_attributes(self) -> None:
     def test_no_overrides_leaves_attributes_untouched(self) -> None:
         """Passing ``None`` overrides must not introduce a ``gridOverrides`` key."""
         dataset = _make_dataset(attributes={"existing": "value"})
-        _add_grid_override_to_metadata(dataset, grid_overrides=None)
+        add_grid_override_to_metadata(dataset, grid_overrides=None)
         assert dataset.metadata.attributes == {"existing": "value"}
diff --git a/tests/unit/ingestion/test_no_template_mutation.py b/tests/unit/ingestion/test_no_template_mutation.py
new file mode 100644
index 00000000..ce634764
--- /dev/null
+++ b/tests/unit/ingestion/test_no_template_mutation.py
@@ -0,0 +1,36 @@
+"""Guardrail: ingestion must never mutate template internals.
+
+The pre-v1.2 ingestion path mutated templates in place (reassigning ``_dim_names``,
+``_var_chunk_shape``, ``_logical_coord_names``) and even monkeypatched template methods at
+runtime. The refactor replaced that with an immutable ``ResolvedSchema``. This test fails
+loudly if any ingestion module reintroduces template-internals mutation or method rebinding.
+"""
+
+from __future__ import annotations
+
+import re
+from pathlib import Path
+
+import mdio
+
+# Assignment to a private template attribute, e.g. ``template._dim_names = ...`` or
+# rebinding a method/attribute on a template object, e.g. ``template._add_coordinates = ...``.
+_MUTATION = re.compile(r"\b(?:template|mdio_template)\.\w+\s*=(?!=)")
+
+_INGESTION_ROOT = Path(mdio.__file__).parent / "ingestion"
+
+
+def _ingestion_sources() -> list[Path]:
+    return sorted(p for p in _INGESTION_ROOT.rglob("*.py") if "__pycache__" not in p.parts)
+
+
+def test_ingestion_never_mutates_template() -> None:
+    """No ingestion module assigns to a template attribute (mutation/monkeypatch guard)."""
+    offenders: list[str] = []
+    for path in _ingestion_sources():
+        for lineno, line in enumerate(path.read_text().splitlines(), start=1):
+            code = line.split("#", 1)[0]
+            if _MUTATION.search(code):
+                offenders.append(f"{path.name}:{lineno}: {line.strip()}")
+
+    assert not offenders, "Ingestion must not mutate template internals:\n" + "\n".join(offenders)
diff --git a/tests/unit/ingestion/test_schema_effects.py b/tests/unit/ingestion/test_schema_effects.py
new file mode 100644
index 00000000..5bd4a224
--- /dev/null
+++ b/tests/unit/ingestion/test_schema_effects.py
@@ -0,0 +1,112 @@
+"""Unit tests for grid-override schema effects and their registry selection."""
+
+from __future__ import annotations
+
+from mdio.builder.schemas.dtype import ScalarType
+from mdio.builder.templates.types import CoordinateSpec
+from mdio.ingestion.schema import CollapseToTraceEffect
+from mdio.ingestion.schema import DimensionSpec
+from mdio.ingestion.schema import InsertTraceDimEffect
+from mdio.ingestion.schema import ResolvedSchema
+from mdio.ingestion.segy.index_strategies import IndexStrategyRegistry
+from mdio.segy.geometry import GridOverrides
+
+
+def _schema() -> ResolvedSchema:
+    """A 3-spatial-dim + vertical schema with a coordinate spanning two spatial dims."""
+    return ResolvedSchema(
+        name="Toy",
+        dimensions=[
+            DimensionSpec(name="shot_point", is_spatial=True, dtype=ScalarType.UINT32),
+            DimensionSpec(name="cable", is_spatial=True, dtype=ScalarType.UINT8),
+            DimensionSpec(name="channel", is_spatial=True, dtype=ScalarType.UINT16),
+            DimensionSpec(name="time", is_spatial=False, dtype=ScalarType.INT32),
+        ],
+        coordinates=[
+            CoordinateSpec(
+                name="group_coord_x",
+                dimensions=("shot_point", "cable", "channel"),
+                dtype=ScalarType.FLOAT64,
+            ),
+        ],
+        chunk_shape=(8, 1, 128, 2048),
+    )
+
+
+class TestRegistrySchemaEffectSelection:
+    """The registry maps GridOverrides to the matching SchemaEffect (single source)."""
+
+    def test_no_overrides_returns_none(self) -> None:
+        """A None or empty override yields no schema effect."""
+        registry = IndexStrategyRegistry()
+        assert registry.schema_effect(None) is None
+        assert registry.schema_effect(GridOverrides()) is None
+
+    def test_header_only_overrides_return_none(self) -> None:
+        """Overrides that only transform headers (no layout change) yield no effect."""
+        registry = IndexStrategyRegistry()
+        assert registry.schema_effect(GridOverrides(auto_channel_wrap=True)) is None
+
+    def test_non_binned_default_collapse(self) -> None:
+        """NonBinned without explicit dims yields a default (None) CollapseToTraceEffect."""
+        effect = IndexStrategyRegistry().schema_effect(GridOverrides(non_binned=True, chunksize=64))
+        assert isinstance(effect, CollapseToTraceEffect)
+        assert effect.chunksize == 64
+        assert effect.collapse_dims is None
+
+    def test_non_binned_explicit_dims(self) -> None:
+        """NonBinned with explicit dims preserves them on the effect."""
+        overrides = GridOverrides(non_binned=True, chunksize=128, non_binned_dims=["channel"])
+        effect = IndexStrategyRegistry().schema_effect(overrides)
+        assert isinstance(effect, CollapseToTraceEffect)
+        assert effect.collapse_dims == ("channel",)
+
+    def test_has_duplicates_inserts_trace(self) -> None:
+        """HasDuplicates yields a 1-wide InsertTraceDimEffect."""
+        effect = IndexStrategyRegistry().schema_effect(GridOverrides(has_duplicates=True))
+        assert isinstance(effect, InsertTraceDimEffect)
+        assert effect.chunksize == 1
+
+
+class TestInsertTraceDimEffect:
+    """InsertTraceDimEffect adds a calculated trace dim before the vertical axis."""
+
+    def test_inserts_trace_before_vertical(self) -> None:
+        """A 1-wide calculated trace dim is inserted; coordinates are untouched."""
+        result = InsertTraceDimEffect(chunksize=1).apply(_schema())
+        assert [d.name for d in result.dimensions] == ["shot_point", "cable", "channel", "trace", "time"]
+        assert result.chunk_shape == (8, 1, 128, 1, 2048)
+        trace = next(d for d in result.dimensions if d.name == "trace")
+        assert trace.is_calculated is True
+        # Coordinates are unchanged by duplicate handling.
+        assert result.coordinates[0].dimensions == ("shot_point", "cable", "channel")
+
+
+class TestCollapseToTraceEffect:
+    """CollapseToTraceEffect collapses spatial dims into a single trace dim."""
+
+    def test_default_collapses_all_but_first(self) -> None:
+        """A None collapse set keeps the first spatial dim and collapses the rest."""
+        result = CollapseToTraceEffect(chunksize=64, collapse_dims=None).apply(_schema())
+        assert [d.name for d in result.dimensions] == ["shot_point", "trace", "time"]
+        assert result.chunk_shape == (8, 64, 2048)
+
+    def test_explicit_collapse_dims(self) -> None:
+        """Only the named dims collapse; the coordinate is rewritten onto trace."""
+        result = CollapseToTraceEffect(chunksize=128, collapse_dims=("channel",)).apply(_schema())
+        assert [d.name for d in result.dimensions] == ["shot_point", "cable", "trace", "time"]
+        group_coord_x = next(c for c in result.coordinates if c.name == "group_coord_x")
+        assert group_coord_x.dimensions == ("shot_point", "cable", "trace")
+
+    def test_collapsed_dim_becomes_trace_coordinate(self) -> None:
+        """A collapsed dimension is re-expressed as a coordinate over trace with its dtype."""
+        result = CollapseToTraceEffect(chunksize=64, collapse_dims=("channel",)).apply(_schema())
+        channel = next(c for c in result.coordinates if c.name == "channel")
+        assert channel.dimensions == ("trace",)
+        assert channel.dtype == ScalarType.UINT16
+
+    def test_empty_collapse_is_noop_for_dimensions(self) -> None:
+        """An empty collapse set inserts no trace dim and keeps the original layout."""
+        result = CollapseToTraceEffect(chunksize=64, collapse_dims=()).apply(_schema())
+        assert [d.name for d in result.dimensions] == ["shot_point", "cable", "channel", "time"]
+        assert result.chunk_shape == (8, 1, 128, 2048)
diff --git a/tests/unit/test_ingestion_schema_resolver.py b/tests/unit/ingestion/test_schema_resolver.py
similarity index 73%
rename from tests/unit/test_ingestion_schema_resolver.py
rename to tests/unit/ingestion/test_schema_resolver.py
index 365b3df2..64fc469a 100644
--- a/tests/unit/test_ingestion_schema_resolver.py
+++ b/tests/unit/ingestion/test_schema_resolver.py
@@ -5,7 +5,9 @@
 from mdio.builder.templates.seismic_3d_cdp import Seismic3DCdpGathersTemplate
 from mdio.builder.templates.seismic_3d_obn import Seismic3DObnReceiverGathersTemplate
 from mdio.builder.templates.seismic_3d_streamer_shot import Seismic3DStreamerShotGathersTemplate
-from mdio.ingestion.schema_resolver import SchemaResolver
+from mdio.ingestion.schema import DimensionSpec
+from mdio.ingestion.schema import ResolvedSchema
+from mdio.ingestion.schema.resolver import SchemaResolver
 from mdio.segy.geometry import GridOverrides
 
 
@@ -77,13 +79,12 @@ def test_coordinate_dimensions_collapsed_when_referenced(self) -> None:
         group_coord_x = next(c for c in schema.coordinates if c.name == "group_coord_x")
         assert group_coord_x.dimensions == ("shot_point", "trace")
 
-    def test_non_binned_flag_recorded_in_metadata(self) -> None:
-        """The NonBinned flag is recorded under ``gridOverrides`` metadata."""
+    def test_grid_override_provenance_not_in_schema_metadata(self) -> None:
+        """The resolver is mechanics-only: override provenance is attached at the dataset level."""
         template = Seismic3DStreamerShotGathersTemplate(data_domain="time")
         overrides = GridOverrides(non_binned=True, chunksize=64)
         schema = SchemaResolver().resolve(template, overrides)
-        assert "gridOverrides" in schema.metadata
-        assert schema.metadata["gridOverrides"].get("NonBinned") is True
+        assert "gridOverrides" not in schema.metadata
 
 
 class TestSchemaResolverHasDuplicates:
@@ -100,8 +101,40 @@ def test_inserts_trace_dim_with_chunksize_one(self) -> None:
         # before the vertical dim.
         assert schema.chunk_shape == (8, 1, 128, 1, 2048)
 
-    def test_has_duplicates_metadata(self) -> None:
-        """The HasDuplicates flag is recorded under ``gridOverrides`` metadata."""
+    def test_has_duplicates_provenance_not_in_schema_metadata(self) -> None:
+        """The resolver does not record override provenance in schema metadata."""
         template = Seismic3DStreamerShotGathersTemplate(data_domain="time")
         schema = SchemaResolver().resolve(template, GridOverrides(has_duplicates=True))
-        assert schema.metadata["gridOverrides"].get("HasDuplicates") is True
+        assert "gridOverrides" not in schema.metadata
+
+
+class TestMissingCalculatedDimensions:
+    """Tests for ``ResolvedSchema.missing_calculated_dimensions``."""
+
+    def _schema(self) -> ResolvedSchema:
+        return ResolvedSchema(
+            name="Calc",
+            dimensions=[
+                DimensionSpec(name="receiver", is_spatial=True),
+                DimensionSpec(name="shot_index", is_spatial=True, is_calculated=True),
+                DimensionSpec(name="time", is_spatial=False),
+            ],
+            coordinates=[],
+            chunk_shape=(2, 2, 4),
+        )
+
+    def test_reports_missing_calculated_dim(self) -> None:
+        """A calculated dim absent from produced names is reported."""
+        schema = self._schema()
+        assert schema.missing_calculated_dimensions(["receiver", "time"]) == ["shot_index"]
+
+    def test_empty_when_calculated_dim_produced(self) -> None:
+        """Nothing is missing once the calculated dim is produced."""
+        schema = self._schema()
+        assert schema.missing_calculated_dimensions(["receiver", "shot_index", "time"]) == []
+
+    def test_non_calculated_dims_are_never_reported(self) -> None:
+        """A missing non-calculated spatial dim is not flagged by this check."""
+        schema = self._schema()
+        # 'receiver' is read from headers, not calculated, so its absence is not reported here.
+        assert schema.missing_calculated_dimensions(["time"]) == ["shot_index"]
diff --git a/tests/unit/ingestion/test_segy_coordinates.py b/tests/unit/ingestion/test_segy_coordinates.py
index 84962986..6a52d3f5 100644
--- a/tests/unit/ingestion/test_segy_coordinates.py
+++ b/tests/unit/ingestion/test_segy_coordinates.py
@@ -16,45 +16,14 @@
 from mdio.builder.schemas.v1.units import LengthUnitEnum
 from mdio.builder.schemas.v1.units import LengthUnitModel
 from mdio.builder.templates.base import AbstractDatasetTemplate
-from mdio.builder.templates.seismic_3d_poststack import Seismic3DPostStackTemplate
-from mdio.ingestion.segy.coordinates import _get_coordinates
-from mdio.ingestion.segy.coordinates import _get_spatial_coordinate_unit
-from mdio.ingestion.segy.coordinates import _populate_coordinates
-from mdio.ingestion.segy.coordinates import _update_template_units
-from tests.unit.ingestion.testing_helpers import make_grid
+from mdio.ingestion.segy.coordinates import get_spatial_coordinate_unit
+from mdio.ingestion.segy.coordinates import populate_coordinates
+from mdio.ingestion.segy.coordinates import resolve_units
 from tests.unit.ingestion.testing_helpers import make_grid_with_map
-from tests.unit.ingestion.testing_helpers import make_header_array
-
-
-class TestGetCoordinates:
-    """Tests for ``_get_coordinates``."""
-
-    def test_returns_dims_and_coords_in_template_order(self) -> None:
-        """Dim coords and non-dim coords should follow the template's declared order."""
-        inline = np.array([1, 2, 3], dtype=np.int32)
-        crossline = np.array([10, 20], dtype=np.int32)
-        sample = np.array([0, 4, 8, 12], dtype=np.int32)
-        grid = make_grid([("inline", inline), ("crossline", crossline), ("time", sample)])
-
-        n = inline.size * crossline.size
-        cdp_x = np.arange(n, dtype=np.float64)
-        cdp_y = np.arange(n, dtype=np.float64) + 100.0
-        headers = make_header_array({"cdp_x": cdp_x, "cdp_y": cdp_y})
-
-        template = Seismic3DPostStackTemplate(data_domain="time")
-
-        dim_coords, non_dim = _get_coordinates(grid, headers, template)
-
-        assert [d.name for d in dim_coords] == ["inline", "crossline", "time"]
-        np.testing.assert_array_equal(dim_coords[0].coords, inline)
-        np.testing.assert_array_equal(dim_coords[1].coords, crossline)
-        assert list(non_dim.keys()) == ["cdp_x", "cdp_y"]
-        np.testing.assert_array_equal(non_dim["cdp_x"], cdp_x)
-        np.testing.assert_array_equal(non_dim["cdp_y"], cdp_y)
 
 
 class TestPopulateCoordinates:
-    """Tests for the ``_populate_coordinates`` wrapper.
+    """Tests for the ``populate_coordinates`` wrapper.
 
     These pin the contract that wraps ``populate_dim_coordinates`` +
     ``populate_non_dim_coordinates``: dim names land in ``drop_vars`` before coord
@@ -86,7 +55,7 @@ def test_wraps_dim_and_non_dim_population_in_order(self) -> None:
             }
         )
 
-        dataset, drop_vars = _populate_coordinates(
+        dataset, drop_vars = populate_coordinates(
             dataset,
             grid,
             coords={"cdp_x": cdp_x},
@@ -102,7 +71,7 @@ def test_wraps_dim_and_non_dim_population_in_order(self) -> None:
 
 
 class TestGetSpatialCoordinateUnit:
-    """Tests for ``_get_spatial_coordinate_unit``."""
+    """Tests for ``get_spatial_coordinate_unit``."""
 
     @pytest.mark.parametrize(
         ("code", "expected_unit"),
@@ -114,7 +83,7 @@ class TestGetSpatialCoordinateUnit:
     def test_known_measurement_codes(self, code: int, expected_unit: LengthUnitEnum) -> None:
         """Codes 1 (m) and 2 (ft) return the corresponding length unit."""
         info = SimpleNamespace(binary_header_dict={"measurement_system_code": code})
-        result = _get_spatial_coordinate_unit(info)
+        result = get_spatial_coordinate_unit(info)
         assert isinstance(result, LengthUnitModel)
         assert result.length == expected_unit
 
@@ -122,40 +91,36 @@ def test_unknown_code_returns_none_and_warns(self, caplog: pytest.LogCaptureFixt
         """Unexpected codes should log a warning and return ``None``."""
         info = SimpleNamespace(binary_header_dict={"measurement_system_code": 7})
         with caplog.at_level(logging.WARNING, logger="mdio.ingestion.segy.coordinates"):
-            result = _get_spatial_coordinate_unit(info)
+            result = get_spatial_coordinate_unit(info)
         assert result is None
         assert any("Unexpected value in coordinate unit" in r.message for r in caplog.records)
 
 
-class TestUpdateTemplateUnits:
-    """Tests for ``_update_template_units``."""
+class TestResolveUnits:
+    """Tests for ``resolve_units``."""
 
     def _stub_template(self) -> MagicMock:
         template = MagicMock(spec=AbstractDatasetTemplate)
-        template.get_unit_by_key.return_value = None
+        template.units = {}
         return template
 
     def test_adds_angle_units_only_when_spatial_unit_missing(self) -> None:
         """Without a spatial unit, only angle units should be added."""
         template = self._stub_template()
-        result = _update_template_units(template, unit=None)
+        result = resolve_units(template, unit=None)
 
-        template.add_units.assert_called_once()
-        added = template.add_units.call_args.args[0]
-        assert set(added.keys()) == {"angle", "azimuth"}
-        for unit in added.values():
+        assert set(result.keys()) == {"angle", "azimuth"}
+        for unit in result.values():
             assert isinstance(unit, AngleUnitModel)
             assert unit.angle == AngleUnitEnum.DEGREES
-        assert result is template
 
     def test_adds_spatial_units_when_unit_provided(self) -> None:
         """A non-None unit should populate all SPATIAL keys plus angle keys."""
         template = self._stub_template()
         unit = LengthUnitModel(length=LengthUnitEnum.METER)
 
-        _update_template_units(template, unit=unit)
+        result = resolve_units(template, unit=unit)
 
-        added = template.add_units.call_args.args[0]
         expected_keys = {
             "angle",
             "azimuth",
@@ -167,9 +132,9 @@ def test_adds_spatial_units_when_unit_provided(self) -> None:
             "group_coord_y",
             "offset",
         }
-        assert set(added.keys()) == expected_keys
+        assert set(result.keys()) == expected_keys
         for key in ("cdp_x", "cdp_y", "source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y", "offset"):
-            assert added[key] is unit
+            assert result[key] is unit
 
     def test_preserves_pre_existing_spatial_units(self, caplog: pytest.LogCaptureFixture) -> None:
         """Keys that already have a template unit must not be overwritten."""
@@ -177,16 +142,11 @@ def test_preserves_pre_existing_spatial_units(self, caplog: pytest.LogCaptureFix
         new_unit = LengthUnitModel(length=LengthUnitEnum.METER)
 
         template = MagicMock(spec=AbstractDatasetTemplate)
-
-        def fake_lookup(key: str) -> LengthUnitModel | None:
-            return existing if key == "cdp_x" else None
-
-        template.get_unit_by_key.side_effect = fake_lookup
+        template.units = {"cdp_x": existing}
 
         with caplog.at_level(logging.WARNING, logger="mdio.ingestion.segy.coordinates"):
-            _update_template_units(template, unit=new_unit)
+            result = resolve_units(template, unit=new_unit)
 
-        added = template.add_units.call_args.args[0]
-        assert "cdp_x" not in added
-        assert added["cdp_y"] is new_unit
+        assert result["cdp_x"] is existing
+        assert result["cdp_y"] is new_unit
         assert any("already in template" in r.message for r in caplog.records)
diff --git a/tests/unit/ingestion/test_segy_file_headers.py b/tests/unit/ingestion/test_segy_file_headers.py
index 39db2339..97480060 100644
--- a/tests/unit/ingestion/test_segy_file_headers.py
+++ b/tests/unit/ingestion/test_segy_file_headers.py
@@ -12,7 +12,7 @@
 from xarray import DataArray as xr_DataArray
 from xarray import Dataset as xr_Dataset
 
-from mdio.ingestion.segy.file_headers import _add_segy_file_headers
+from mdio.ingestion.segy.file_headers import add_segy_file_headers
 
 
 def _valid_text_header() -> str:
@@ -37,14 +37,14 @@ def _empty_dataset() -> xr_Dataset:
 
 
 class TestAddSegyFileHeaders:
-    """Tests for ``_add_segy_file_headers``."""
+    """Tests for ``add_segy_file_headers``."""
 
     def test_disabled_returns_dataset_unchanged(self) -> None:
         """When the save flag is off the dataset must not be modified."""
         info = _make_segy_info()
         ds = _empty_dataset()
         with patch.dict(os.environ, {"MDIO__IMPORT__SAVE_SEGY_FILE_HEADER": "false"}):
-            result = _add_segy_file_headers(ds, info)
+            result = add_segy_file_headers(ds, info)
 
         assert "segy_file_header" not in result
 
@@ -53,7 +53,7 @@ def test_attaches_headers_when_enabled(self) -> None:
         info = _make_segy_info()
         ds = _empty_dataset()
         with patch.dict(os.environ, {"MDIO__IMPORT__SAVE_SEGY_FILE_HEADER": "true"}):
-            result = _add_segy_file_headers(ds, info)
+            result = add_segy_file_headers(ds, info)
 
         attrs = result["segy_file_header"].attrs
         assert attrs["textHeader"] == info.text_header
@@ -69,7 +69,7 @@ def test_attaches_raw_binary_when_raw_flag_enabled(self) -> None:
             "MDIO__IMPORT__RAW_HEADERS": "true",
         }
         with patch.dict(os.environ, env):
-            result = _add_segy_file_headers(ds, info)
+            result = add_segy_file_headers(ds, info)
 
         encoded = base64.b64encode(b"abc").decode("ascii")
         assert result["segy_file_header"].attrs["rawBinaryHeader"] == encoded
@@ -83,7 +83,7 @@ def test_invalid_row_count_raises(self) -> None:
             patch.dict(os.environ, {"MDIO__IMPORT__SAVE_SEGY_FILE_HEADER": "true"}),
             pytest.raises(ValueError, match="Invalid text header line count"),
         ):
-            _add_segy_file_headers(ds, info)
+            add_segy_file_headers(ds, info)
 
     def test_invalid_column_count_raises(self) -> None:
         """Text header rows shorter than 80 chars must raise."""
@@ -95,4 +95,4 @@ def test_invalid_column_count_raises(self) -> None:
             patch.dict(os.environ, {"MDIO__IMPORT__SAVE_SEGY_FILE_HEADER": "true"}),
             pytest.raises(ValueError, match="Invalid text header line widths"),
         ):
-            _add_segy_file_headers(ds, info)
+            add_segy_file_headers(ds, info)
diff --git a/tests/unit/test_segy_grid_overrides.py b/tests/unit/ingestion/test_segy_grid_overrides.py
similarity index 74%
rename from tests/unit/test_segy_grid_overrides.py
rename to tests/unit/ingestion/test_segy_grid_overrides.py
index b7c862c8..34055fcc 100644
--- a/tests/unit/test_segy_grid_overrides.py
+++ b/tests/unit/ingestion/test_segy_grid_overrides.py
@@ -15,8 +15,12 @@
 from numpy.testing import assert_array_equal
 
 from mdio.core import Dimension
+from mdio.ingestion.segy.index_strategies import IndexStrategyRegistry
+from mdio.segy.exceptions import GridOverrideMissingParameterError
 from mdio.segy.exceptions import GridOverrideUnknownError
-from mdio.segy.geometry import GridOverrider
+from mdio.segy.geometry import GridOverrides
+from mdio.segy.geometry import _resolve_synthesize_dims
+from mdio.segy.geometry import _validate_template_for_overrides
 
 if TYPE_CHECKING:
     from mdio.builder.templates.base import AbstractDatasetTemplate
@@ -32,10 +36,58 @@ def run_override(
     headers: npt.NDArray,
     chunksize: tuple[int, ...] | None = None,
     template: AbstractDatasetTemplate | None = None,
-) -> tuple[dict[str, Any], tuple[str], tuple[int]]:
-    """Initialize and run overrider."""
-    overrider = GridOverrider()
-    return overrider.run(headers, index_names, grid_overrides, chunksize, template=template)
+) -> tuple[npt.NDArray, tuple[str, ...], tuple[int, ...] | None]:
+    """Initialize and run overrider using IndexStrategyRegistry."""
+    grid_overrides = grid_overrides or {}
+
+    field_names = set(GridOverrides.model_fields.keys())
+    aliases = {field.alias for field in GridOverrides.model_fields.values() if field.alias}
+    valid_keys = field_names | aliases
+    for key in grid_overrides:
+        if key not in valid_keys:
+            raise GridOverrideUnknownError(key)
+
+    config = GridOverrides.model_validate(grid_overrides)
+
+    if config.non_binned:
+        missing: set[str] = set()
+        if config.chunksize is None:
+            missing.add("chunksize")
+        if not config.non_binned_dims:
+            missing.add("non_binned_dims")
+        if missing:
+            command = "NonBinned"
+            raise GridOverrideMissingParameterError(command, missing)
+
+    _validate_template_for_overrides(config, template)
+
+    synthesize_dims = _resolve_synthesize_dims(template)
+    registry = IndexStrategyRegistry()
+    strategy = registry.create_strategy(
+        grid_overrides=config,
+        synthesize_dims=synthesize_dims,
+        template=template,
+    )
+
+    strategy.validate_headers(headers)
+    new_headers = strategy.transform_headers(headers)
+
+    new_names = list(index_names)
+    new_chunks = list(chunksize) if chunksize is not None else None
+
+    # Both NonBinned and HasDuplicates add a 'trace' dim at index -1; HasDuplicates
+    # always uses chunksize 1, NonBinned uses the user-supplied value.
+    if config.non_binned or config.has_duplicates:
+        new_names.append("trace")
+        if new_chunks is not None:
+            inserted_chunk = config.chunksize if config.non_binned else 1
+            new_chunks.insert(-1, inserted_chunk)
+
+    return (
+        new_headers,
+        tuple(new_names),
+        tuple(new_chunks) if new_chunks is not None else None,
+    )
 
 
 def get_dims(headers: npt.NDArray) -> list[Dimension]:
@@ -143,9 +195,8 @@ def test_unknown_override(
         """Test exception if user provides a command that's not allowed."""
         index_names = ("shot_point", "cable", "channel")
         chunksize = None
-        overrider = GridOverrider()
         with pytest.raises(GridOverrideUnknownError):
-            overrider.run(mock_streamer_headers, index_names, {"WrongCommand": True}, chunksize)
+            run_override({"WrongCommand": True}, index_names, mock_streamer_headers, chunksize)
 
 
 # OBN test fixtures and tests
@@ -159,9 +210,10 @@ class TestObnGridOverrides:
     """Check grid overrides for OBN (Ocean Bottom Node) data.
 
     Note: The synthetic component behavior (when SEG-Y spec doesn't have a component field)
-    is handled by the template's internal `_synthetic_defaults` attribute in `utilities.get_grid_plan()`,
-    not by a grid override. See integration test `test_import_obn_synthetic_component` for
-    full flow coverage. These unit tests focus on grid override functionality.
+    is handled by ``ComponentSynthesisStrategy`` (driven by the template's
+    ``synthesize_missing_dims``) during header reading, not by a grid override. See integration
+    test `test_import_obn_synthetic_component` for full flow coverage. These unit tests focus on
+    grid override functionality.
     """
 
     def test_calculate_shot_index_obn(self) -> None:
diff --git a/tests/unit/test_ingestion_index_strategies.py b/tests/unit/ingestion/test_segy_index_strategies.py
similarity index 93%
rename from tests/unit/test_ingestion_index_strategies.py
rename to tests/unit/ingestion/test_segy_index_strategies.py
index 0cac710c..70384ed3 100644
--- a/tests/unit/test_ingestion_index_strategies.py
+++ b/tests/unit/ingestion/test_segy_index_strategies.py
@@ -1,6 +1,6 @@
 """Unit tests for the v1.2 ingestion index strategies and the strategy registry.
 
-These tests exercise individual :class:`mdio.ingestion.index_strategies.IndexStrategy`
+These tests exercise individual :class:`mdio.ingestion.segy.index_strategies.IndexStrategy`
 subclasses with synthetic structured numpy arrays (mimicking the shape semantics of
 :class:`segy.arrays.HeaderArray`) so they remain fast and do not require any real SEG-Y
 data.
@@ -8,21 +8,52 @@
 
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+from typing import Any
+
 import numpy as np
 import pytest
 
 from mdio.builder.template_registry import TemplateRegistry
-from mdio.ingestion.index_strategies import ChannelWrappingStrategy
-from mdio.ingestion.index_strategies import ComponentSynthesisStrategy
-from mdio.ingestion.index_strategies import CompositeStrategy
-from mdio.ingestion.index_strategies import DuplicateHandlingStrategy
-from mdio.ingestion.index_strategies import IndexStrategyRegistry
-from mdio.ingestion.index_strategies import NonBinnedStrategy
-from mdio.ingestion.index_strategies import RegularGridStrategy
-from mdio.ingestion.index_strategies import ShotWrappingStrategy
+from mdio.ingestion.segy.index_strategies import ChannelWrappingStrategy
+
+if TYPE_CHECKING:
+    from mdio.builder.templates.base import AbstractDatasetTemplate
+from mdio.ingestion.segy.index_strategies import ComponentSynthesisStrategy
+from mdio.ingestion.segy.index_strategies import CompositeStrategy
+from mdio.ingestion.segy.index_strategies import DuplicateHandlingStrategy
+from mdio.ingestion.segy.index_strategies import IndexStrategyRegistry
+from mdio.ingestion.segy.index_strategies import NonBinnedStrategy
+from mdio.ingestion.segy.index_strategies import RegularGridStrategy
+from mdio.ingestion.segy.index_strategies import ShotWrappingStrategy
 from mdio.segy.exceptions import GridOverrideKeysError
-from mdio.segy.geometry import GridOverrider
 from mdio.segy.geometry import GridOverrides
+from mdio.segy.geometry import _resolve_synthesize_dims
+from mdio.segy.geometry import _validate_template_for_overrides
+
+
+class GridOverrider:
+    """Mock GridOverrider shim for template and keys validation tests."""
+
+    def run(
+        self,
+        headers: np.ndarray,
+        index_names: tuple[str, ...],  # noqa: ARG002
+        grid_overrides: dict[str, Any],
+        template: AbstractDatasetTemplate | None,
+    ) -> np.ndarray:
+        """Run mock strategy validation and transform."""
+        config = GridOverrides.model_validate(grid_overrides)
+        _validate_template_for_overrides(config, template)
+        synthesize_dims = _resolve_synthesize_dims(template)
+        registry = IndexStrategyRegistry()
+        strategy = registry.create_strategy(
+            grid_overrides=config,
+            synthesize_dims=synthesize_dims,
+            template=template,
+        )
+        strategy.validate_headers(headers)
+        return strategy.transform_headers(headers)
 
 
 def _make_struct(data: dict[str, np.ndarray]) -> np.ndarray:
diff --git a/tests/unit/ingestion/test_segy_pipeline.py b/tests/unit/ingestion/test_segy_pipeline.py
new file mode 100644
index 00000000..296887e7
--- /dev/null
+++ b/tests/unit/ingestion/test_segy_pipeline.py
@@ -0,0 +1,90 @@
+"""Unit tests for the slim ingestion pipeline helpers."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+from mdio.ingestion.schema import DimensionSpec
+from mdio.ingestion.schema import ResolvedSchema
+from mdio.ingestion.segy import pipeline
+from mdio.ingestion.segy.raw_headers import build_raw_header_variables
+
+
+def _schema(dimensions: list[DimensionSpec]) -> ResolvedSchema:
+    return ResolvedSchema(
+        name="Toy",
+        dimensions=dimensions,
+        coordinates=[],
+        chunk_shape=tuple(2 for _ in dimensions),
+    )
+
+
+class TestResolveOutputPath:
+    """Tests for ``_resolve_output_path`` overwrite enforcement."""
+
+    def test_returns_path_when_absent(self, tmp_path) -> None:  # noqa: ANN001
+        """A non-existent location is returned normalized."""
+        target = tmp_path / "out.mdio"
+        result = pipeline._resolve_output_path(str(target), overwrite=False)
+        assert result.as_posix().endswith("out.mdio")
+
+    def test_raises_when_exists_without_overwrite(self, tmp_path) -> None:  # noqa: ANN001
+        """An existing location without overwrite raises FileExistsError."""
+        target = tmp_path / "out.mdio"
+        target.mkdir()
+        with pytest.raises(FileExistsError, match="overwrite=True"):
+            pipeline._resolve_output_path(str(target), overwrite=False)
+
+    def test_allows_existing_with_overwrite(self, tmp_path) -> None:  # noqa: ANN001
+        """An existing location with overwrite is allowed."""
+        target = tmp_path / "out.mdio"
+        target.mkdir()
+        result = pipeline._resolve_output_path(str(target), overwrite=True)
+        assert result.as_posix().endswith("out.mdio")
+
+
+class TestVerifyCalculatedDimensions:
+    """Tests for ``_verify_calculated_dimensions``."""
+
+    def test_passes_when_calculated_dim_present(self) -> None:
+        """No error when every calculated dim was produced."""
+        schema = _schema(
+            [
+                DimensionSpec(name="receiver", is_spatial=True),
+                DimensionSpec(name="shot_index", is_spatial=True, is_calculated=True),
+                DimensionSpec(name="time", is_spatial=False),
+            ]
+        )
+        produced = [SimpleNamespace(name=n) for n in ("receiver", "shot_index", "time")]
+        pipeline._verify_calculated_dimensions(schema, produced, "Toy")
+
+    def test_raises_when_calculated_dim_missing(self) -> None:
+        """A missing calculated dim raises a descriptive ValueError."""
+        schema = _schema(
+            [
+                DimensionSpec(name="receiver", is_spatial=True),
+                DimensionSpec(name="shot_index", is_spatial=True, is_calculated=True),
+                DimensionSpec(name="time", is_spatial=False),
+            ]
+        )
+        produced = [SimpleNamespace(name=n) for n in ("receiver", "time")]
+        with pytest.raises(ValueError, match="shot_index"):
+            pipeline._verify_calculated_dimensions(schema, produced, "Toy")
+
+
+class TestBuildRawHeaderVariables:
+    """Tests for the isolated experimental raw-headers feature."""
+
+    def test_disabled_by_default(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """With the feature flag off, no extra variables are produced."""
+        monkeypatch.delenv("MDIO__IMPORT__RAW_HEADERS", raising=False)
+        schema = _schema(
+            [
+                DimensionSpec(name="inline", is_spatial=True),
+                DimensionSpec(name="crossline", is_spatial=True),
+                DimensionSpec(name="time", is_spatial=False),
+            ]
+        )
+        assert build_raw_header_variables(schema) == []
diff --git a/tests/unit/ingestion/test_segy_reader.py b/tests/unit/ingestion/test_segy_reader.py
new file mode 100644
index 00000000..5017902e
--- /dev/null
+++ b/tests/unit/ingestion/test_segy_reader.py
@@ -0,0 +1,136 @@
+"""Tests for ``read_index_headers``: header subset selection, strategy, dim building."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import numpy as np
+
+from mdio.builder.schemas.dtype import ScalarType
+from mdio.builder.templates.types import CoordinateSpec
+from mdio.ingestion.schema import DimensionSpec
+from mdio.ingestion.schema import ResolvedSchema
+from mdio.ingestion.segy import reader
+from tests.unit.ingestion.testing_helpers import make_header_array
+
+_READER = "mdio.ingestion.segy.reader"
+
+
+def _spec_with_fields(*names: str) -> SimpleNamespace:
+    """Build a fake SegySpec exposing ``spec.trace.header.fields`` with the given names."""
+    fields = [SimpleNamespace(name=name) for name in names]
+    return SimpleNamespace(trace=SimpleNamespace(header=SimpleNamespace(fields=fields)))
+
+
+def _file_info(num_traces: int, sample_labels: np.ndarray) -> SimpleNamespace:
+    """Build a fake SegyFileInfo carrying only the attributes the reader touches."""
+    return SimpleNamespace(num_traces=num_traces, sample_labels=sample_labels)
+
+
+def _schema(dimensions: list[DimensionSpec], coordinates: list[CoordinateSpec]) -> ResolvedSchema:
+    return ResolvedSchema(
+        name="ReaderToy",
+        dimensions=dimensions,
+        coordinates=coordinates,
+        chunk_shape=tuple(-1 for _ in dimensions),
+    )
+
+
+class TestReadIndexHeaders:
+    """Tests for the decomposed SEG-Y index-header reader."""
+
+    def test_regular_path_builds_spatial_and_vertical_dims(self) -> None:
+        """No overrides: spatial dims come from unique header values, vertical from samples."""
+        schema = _schema(
+            dimensions=[
+                DimensionSpec(name="inline", is_spatial=True),
+                DimensionSpec(name="crossline", is_spatial=True),
+                DimensionSpec(name="time", is_spatial=False),
+            ],
+            coordinates=[CoordinateSpec(name="cdp_x", dimensions=("inline", "crossline"), dtype=ScalarType.FLOAT64)],
+        )
+        headers = make_header_array(
+            {
+                "inline": np.array([1, 1, 2, 2], dtype=np.int32),
+                "crossline": np.array([10, 11, 10, 11], dtype=np.int32),
+                "cdp_x": np.array([0.0, 1.0, 2.0, 3.0]),
+            }
+        )
+        segy_file_kwargs = {"spec": _spec_with_fields("inline", "crossline", "cdp_x", "coordinate_scalar")}
+
+        with patch(f"{_READER}.parse_headers", return_value=headers) as mock_parse:
+            indexed, dimensions = reader.read_index_headers(
+                segy_file_kwargs=segy_file_kwargs,
+                file_info=_file_info(4, np.array([0, 2000, 4000])),
+                schema=schema,
+                grid_overrides=None,
+                synthesize_dims=(),
+            )
+
+        # Only spec-present required fields are parsed.
+        subset = set(mock_parse.call_args.kwargs["subset"])
+        assert subset == {"inline", "crossline", "cdp_x", "coordinate_scalar"}
+
+        names = [d.name for d in dimensions]
+        assert names == ["inline", "crossline", "time"]
+        assert dimensions[0].coords.tolist() == [1, 2]
+        assert dimensions[1].coords.tolist() == [10, 11]
+        # sample labels normalized by 1000 and cast to int when integral.
+        assert dimensions[-1].coords.tolist() == [0, 2, 4]
+        assert indexed is headers
+
+    def test_subset_excludes_fields_absent_from_spec(self) -> None:
+        """Required fields not in the SEG-Y spec are dropped from the parse subset."""
+        schema = _schema(
+            dimensions=[
+                DimensionSpec(name="inline", is_spatial=True),
+                DimensionSpec(name="component", is_spatial=True),
+                DimensionSpec(name="time", is_spatial=False),
+            ],
+            coordinates=[],
+        )
+        headers = make_header_array({"inline": np.array([1, 2], dtype=np.int32)})
+        # spec lacks 'component' on purpose.
+        segy_file_kwargs = {"spec": _spec_with_fields("inline", "coordinate_scalar")}
+
+        with patch(f"{_READER}.parse_headers", return_value=headers) as mock_parse:
+            reader.read_index_headers(
+                segy_file_kwargs=segy_file_kwargs,
+                file_info=_file_info(2, np.array([0, 1000])),
+                schema=schema,
+                grid_overrides=None,
+                synthesize_dims=(),
+            )
+
+        subset = set(mock_parse.call_args.kwargs["subset"])
+        assert "component" not in subset
+        assert subset == {"inline", "coordinate_scalar"}
+
+    def test_synthesize_dims_produces_missing_dimension(self) -> None:
+        """A synthesized dim absent from headers is created and yields a dimension."""
+        schema = _schema(
+            dimensions=[
+                DimensionSpec(name="component", is_spatial=True),
+                DimensionSpec(name="receiver", is_spatial=True),
+                DimensionSpec(name="time", is_spatial=False),
+            ],
+            coordinates=[],
+        )
+        headers = make_header_array({"receiver": np.array([5, 6, 7], dtype=np.uint32)})
+        segy_file_kwargs = {"spec": _spec_with_fields("receiver", "coordinate_scalar")}
+
+        with patch(f"{_READER}.parse_headers", return_value=headers):
+            indexed, dimensions = reader.read_index_headers(
+                segy_file_kwargs=segy_file_kwargs,
+                file_info=_file_info(3, np.array([0, 1000, 2000])),
+                schema=schema,
+                grid_overrides=None,
+                synthesize_dims=("component",),
+            )
+
+        names = [d.name for d in dimensions]
+        assert names == ["component", "receiver", "time"]
+        # ComponentSynthesisStrategy fills the missing dim with a constant 1.
+        assert "component" in indexed.dtype.names
+        assert dimensions[0].coords.tolist() == [1]
diff --git a/tests/unit/test_segy_spec_validation.py b/tests/unit/ingestion/test_segy_spec_validation.py
similarity index 91%
rename from tests/unit/test_segy_spec_validation.py
rename to tests/unit/ingestion/test_segy_spec_validation.py
index afdfeac6..10115532 100644
--- a/tests/unit/test_segy_spec_validation.py
+++ b/tests/unit/ingestion/test_segy_spec_validation.py
@@ -9,11 +9,11 @@
 from segy.standards import get_segy_standard
 
 from mdio.builder.templates.base import AbstractDatasetTemplate
-from mdio.converters.segy import _validate_spec_in_template
+from mdio.ingestion.segy.validation import validate_spec_in_template
 
 
 class TestValidateSpecInTemplate:
-    """Test cases for _validate_spec_in_template function."""
+    """Test cases for validate_spec_in_template function."""
 
     def test_validation_passes_with_all_required_fields(self) -> None:
         """Test that validation passes when all required fields are present."""
@@ -25,7 +25,7 @@ def test_validation_passes_with_all_required_fields(self) -> None:
         segy_spec = get_segy_standard(1.0)
 
         # Should not raise any exception
-        _validate_spec_in_template(segy_spec, template)
+        validate_spec_in_template(segy_spec, template)
 
     def test_validation_fails_with_missing_fields(self) -> None:
         """Test that validation fails when required fields are missing."""
@@ -44,7 +44,7 @@ def test_validation_fails_with_missing_fields(self) -> None:
 
         # Should raise ValueError listing the missing fields
         with pytest.raises(ValueError, match=r"Required fields.*not found in.*segy_spec") as exc_info:
-            _validate_spec_in_template(segy_spec, template)
+            validate_spec_in_template(segy_spec, template)
 
         error_message = str(exc_info.value)
         assert "custom_dim2" in error_message
@@ -68,7 +68,7 @@ def test_validation_fails_with_missing_coordinate_scalar(self) -> None:
 
         # Should raise ValueError for missing coordinate_scalar
         with pytest.raises(ValueError, match=r"Required fields.*not found in.*segy_spec") as exc_info:
-            _validate_spec_in_template(segy_spec, template)
+            validate_spec_in_template(segy_spec, template)
 
         error_message = str(exc_info.value)
         assert "coordinate_scalar" in error_message
diff --git a/tests/unit/ingestion/test_segy_validation.py b/tests/unit/ingestion/test_segy_validation.py
index 38f1b75f..8159ec51 100644
--- a/tests/unit/ingestion/test_segy_validation.py
+++ b/tests/unit/ingestion/test_segy_validation.py
@@ -11,7 +11,7 @@
 from mdio.builder.template_registry import TemplateRegistry
 from mdio.builder.templates.base import AbstractDatasetTemplate
 from mdio.builder.templates.seismic_3d_obn import Seismic3DObnReceiverGathersTemplate
-from mdio.ingestion.segy.validation import _validate_spec_in_template
+from mdio.ingestion.segy.validation import validate_spec_in_template
 
 
 class TestValidateSpecInTemplate:
@@ -25,7 +25,7 @@ def test_passes_with_all_required_fields(self) -> None:
         template.calculated_dimension_names = ()
 
         segy_spec = get_segy_standard(1.0)
-        _validate_spec_in_template(segy_spec, template)
+        validate_spec_in_template(segy_spec, template)
 
     def test_missing_fields_listed_in_error(self) -> None:
         """The error message must enumerate all missing required fields."""
@@ -40,7 +40,7 @@ def test_missing_fields_listed_in_error(self) -> None:
         spec = spec.customize(trace_header_fields=[HeaderField(name="custom_dim1", byte=189, format="int32")])
 
         with pytest.raises(ValueError, match=r"Required fields.*not found in.*segy_spec") as exc:
-            _validate_spec_in_template(spec, template)
+            validate_spec_in_template(spec, template)
 
         msg = str(exc.value)
         assert "custom_dim2" in msg
@@ -61,7 +61,7 @@ def test_missing_coordinate_scalar_raises(self) -> None:
         spec = spec.customize(trace_header_fields=kept)
 
         with pytest.raises(ValueError, match=r"coordinate_scalar"):
-            _validate_spec_in_template(spec, template)
+            validate_spec_in_template(spec, template)
 
     def test_calculated_dimensions_are_not_required(self) -> None:
         """Dimensions in ``calculated_dimension_names`` should not be required from the spec."""
@@ -72,7 +72,7 @@ def test_calculated_dimensions_are_not_required(self) -> None:
         template.calculated_dimension_names = ("calculated_only",)
 
         segy_spec = get_segy_standard(1.0)
-        _validate_spec_in_template(segy_spec, template)
+        validate_spec_in_template(segy_spec, template)
 
     def test_obn_template_excludes_component_requirement(self) -> None:
         """OBN templates synthesize ``component`` when absent → not required from spec."""
@@ -95,7 +95,7 @@ def test_obn_template_excludes_component_requirement(self) -> None:
             ]
         )
 
-        _validate_spec_in_template(spec, template)
+        validate_spec_in_template(spec, template)
 
     def test_obn_template_missing_other_required_field_still_fails(self) -> None:
         """Even with the ``component`` carve-out, other missing fields should error."""
@@ -103,4 +103,4 @@ def test_obn_template_missing_other_required_field_still_fails(self) -> None:
         spec = get_segy_standard(1.0)  # missing OBN-specific fields like 'receiver', 'shot_line', etc.
 
         with pytest.raises(ValueError, match=r"Required fields.*not found"):
-            _validate_spec_in_template(spec, template)
+            validate_spec_in_template(spec, template)
diff --git a/tests/unit/test_segy_file_header_modes.py b/tests/unit/test_segy_file_header_modes.py
index c363426e..f37eda4c 100644
--- a/tests/unit/test_segy_file_header_modes.py
+++ b/tests/unit/test_segy_file_header_modes.py
@@ -1,4 +1,4 @@
-"""Tests for ``_add_segy_file_headers`` mode handling.
+"""Tests for ``add_segy_file_headers`` mode handling.
 
 Covers the three values of ``MDIO__IMPORT__SAVE_SEGY_FILE_HEADER``:
 0 skips, 1 raises on a malformed text header, 2 corrects a malformed text header.
@@ -13,7 +13,7 @@
 import pytest
 import xarray as xr
 
-from mdio.ingestion.segy.file_headers import _add_segy_file_headers
+from mdio.ingestion.segy.file_headers import add_segy_file_headers
 from mdio.segy.file import SegyFileInfo
 
 
@@ -57,7 +57,7 @@ def test_mode_zero_skips_header_save(self) -> None:
         """Mode 0 leaves the dataset without a ``segy_file_header`` variable."""
         ds = xr.Dataset()
         with patch.dict(os.environ, {"MDIO__IMPORT__SAVE_SEGY_FILE_HEADER": "0"}):
-            result = _add_segy_file_headers(ds, _segy_info(_malformed_header()))
+            result = add_segy_file_headers(ds, _segy_info(_malformed_header()))
 
         assert "segy_file_header" not in result
 
@@ -66,7 +66,7 @@ def test_mode_one_accepts_well_formed(self) -> None:
         ds = xr.Dataset()
         header = _well_formed_header()
         with patch.dict(os.environ, {"MDIO__IMPORT__SAVE_SEGY_FILE_HEADER": "1"}):
-            result = _add_segy_file_headers(ds, _segy_info(header))
+            result = add_segy_file_headers(ds, _segy_info(header))
 
         assert result["segy_file_header"].attrs["textHeader"] == header
 
@@ -77,7 +77,7 @@ def test_mode_one_raises_on_malformed(self) -> None:
             patch.dict(os.environ, {"MDIO__IMPORT__SAVE_SEGY_FILE_HEADER": "1"}),
             pytest.raises(ValueError, match="non-ASCII or non-printable"),
         ):
-            _add_segy_file_headers(ds, _segy_info(_malformed_header()))
+            add_segy_file_headers(ds, _segy_info(_malformed_header()))
 
     def test_mode_one_raises_on_replacement_char(self) -> None:
         """Mode 1 raises on U+FFFD."""
@@ -86,7 +86,7 @@ def test_mode_one_raises_on_replacement_char(self) -> None:
             patch.dict(os.environ, {"MDIO__IMPORT__SAVE_SEGY_FILE_HEADER": "1"}),
             pytest.raises(ValueError, match="non-ASCII or non-printable"),
         ):
-            _add_segy_file_headers(ds, _segy_info(_replacement_char_header()))
+            add_segy_file_headers(ds, _segy_info(_replacement_char_header()))
 
     def test_mode_two_corrects_malformed(self, caplog: pytest.LogCaptureFixture) -> None:
         """Mode 2 repairs a NUL byte and stores a 40x80 header."""
@@ -95,7 +95,7 @@ def test_mode_two_corrects_malformed(self, caplog: pytest.LogCaptureFixture) ->
             patch.dict(os.environ, {"MDIO__IMPORT__SAVE_SEGY_FILE_HEADER": "2"}),
             caplog.at_level(logging.WARNING, logger="mdio.ingestion.segy.file_headers"),
         ):
-            result = _add_segy_file_headers(ds, _segy_info(_malformed_header()))
+            result = add_segy_file_headers(ds, _segy_info(_malformed_header()))
 
         stored = result["segy_file_header"].attrs["textHeader"]
         assert "\x00" not in stored
@@ -110,7 +110,7 @@ def test_mode_two_corrects_replacement_char(self, caplog: pytest.LogCaptureFixtu
             patch.dict(os.environ, {"MDIO__IMPORT__SAVE_SEGY_FILE_HEADER": "2"}),
             caplog.at_level(logging.WARNING, logger="mdio.ingestion.segy.file_headers"),
         ):
-            result = _add_segy_file_headers(ds, _segy_info(_replacement_char_header()))
+            result = add_segy_file_headers(ds, _segy_info(_replacement_char_header()))
 
         stored = result["segy_file_header"].attrs["textHeader"]
         assert "\ufffd" not in stored
@@ -125,7 +125,7 @@ def test_mode_two_passes_through_well_formed(self, caplog: pytest.LogCaptureFixt
             patch.dict(os.environ, {"MDIO__IMPORT__SAVE_SEGY_FILE_HEADER": "2"}),
             caplog.at_level(logging.WARNING, logger="mdio.ingestion.segy.file_headers"),
         ):
-            result = _add_segy_file_headers(ds, _segy_info(header))
+            result = add_segy_file_headers(ds, _segy_info(header))
 
         assert result["segy_file_header"].attrs["textHeader"] == header
         assert not any("Correcting" in record.message for record in caplog.records)
@@ -140,7 +140,7 @@ def test_mode_two_repairs_double_newline_wrapped(self, caplog: pytest.LogCapture
             patch.dict(os.environ, {"MDIO__IMPORT__SAVE_SEGY_FILE_HEADER": "2"}),
             caplog.at_level(logging.WARNING, logger="mdio.ingestion.segy.file_headers"),
         ):
-            result = _add_segy_file_headers(ds, _segy_info(wrapped))
+            result = add_segy_file_headers(ds, _segy_info(wrapped))
 
         stored = result["segy_file_header"].attrs["textHeader"]
         stored_rows = stored.split("\n")

From 62c7f0a818b23f363a2a82c1701429b6154a4365 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 2 Jun 2026 18:39:16 +0000
Subject: [PATCH 02/11] chore: stop tracking refactor_plan_bak.md

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 refactor_plan_bak.md | 220 -------------------------------------------
 1 file changed, 220 deletions(-)
 delete mode 100644 refactor_plan_bak.md

diff --git a/refactor_plan_bak.md b/refactor_plan_bak.md
deleted file mode 100644
index ce33bde9..00000000
--- a/refactor_plan_bak.md
+++ /dev/null
@@ -1,220 +0,0 @@
----
-name: split-grid-overrides-v3
-overview: Split the current `grid_overrides_v3` branch into 8 dependency-ordered, atomically reviewable PRs against `main`, each with its own changelog story. Each PR is green on its own; PR 6 is the one that flips runtime over to the new orchestrator and is the natural minor-version-bump cut.
-todos:
-  - id: pr1
-    content: "PR 1: extract pure helpers (header_analysis, coordinate_utils, validation, metadata) into mdio/ingestion/ with no behavior change"
-    status: pending
-  - id: pr2
-    content: "PR 2: add Pydantic GridOverrides class + dict deprecation shim, additive only"
-    status: pending
-  - id: pr3
-    content: "PR 3: replace GridOverrider internals with IndexStrategy pattern, fix the broken test_segy_grid_overrides.py file"
-    status: pending
-  - id: pr4
-    content: "PR 4: introduce SchemaResolver + ResolvedSchema and template declare_coordinate_specs/apply_resolved_dimensions hooks (dead code, wired in PR 6)"
-    status: pending
-  - id: pr5
-    content: "PR 5: add HeaderAnalyzer and DatasetFactory services with unit tests (dead code, wired in PR 6)"
-    status: pending
-  - id: pr6
-    content: "PR 6: introduce run_segy_ingestion orchestrator, slim segy_to_mdio to a shim, expose new public API, delete legacy GridOverrider, decide CLI fate"
-    status: pending
-  - id: pr7
-    content: "PR 7: extract MDIO__IMPORT__RAW_HEADERS gating into _raw_headers_experimental.py for one-file removal"
-    status: pending
-  - id: pr8
-    content: "PR 8: dependency bump (uv.lock) and template internal polish"
-    status: pending
-isProject: false
----
-
-# Split `grid_overrides_v3` Into 8 Atomic PRs
-
-## Strategy
-
-The current branch interleaves five themes: typed config, strategy refactor, schema resolution, services, pipeline rewire, and an experimental flag move. Treat the existing branch as the **source of truth for the final state** and replay it as a stack of branches off `main`. Each PR below cherry-picks a tight subset of files; nothing earlier than PR 6 changes the runtime path of `segy_to_mdio`, so each one merges without a behavior change visible to users.
-
-Suggested workflow per PR:
-
-1. `git checkout -b <pr-name> origin/main`
-2. `git checkout grid_overrides_v3 -- <files for this PR>`
-3. Trim back anything that pulls in scope from later PRs (notes per PR below).
-4. Run `nox -s pre-commit` and `nox -s tests-3.13 -- <relevant tests>` per the workspace rule.
-5. Open the PR with the suggested title/summary.
-
-```mermaid
-flowchart LR
-    PR1[PR1 extract pure modules] --> PR2[PR2 Pydantic GridOverrides]
-    PR1 --> PR3[PR3 strategy pattern]
-    PR2 --> PR3
-    PR3 --> PR4[PR4 schema resolver and template hooks]
-    PR4 --> PR5[PR5 HeaderAnalyzer and DatasetFactory]
-    PR5 --> PR6[PR6 pipeline orchestrator and public API]
-    PR6 --> PR7[PR7 raw-headers gating]
-    PR6 --> PR8[PR8 dep bump and template polish]
-```
-
----
-
-## PR 1 - Extract pure modules into `mdio/ingestion/`
-
-**Goal:** mechanical code move, zero behavior change. Gives the next PRs a home without dragging architecture along.
-
-**Files (copy from branch verbatim):**
-- [src/mdio/ingestion/__init__.py](src/mdio/ingestion/__init__.py) - trimmed to only re-export what this PR introduces
-- [src/mdio/ingestion/header_analysis.py](src/mdio/ingestion/header_analysis.py) (the pure analysis primitives + `StreamerShotGeometryType`, `ShotGunGeometryType`, `analyze_streamer_headers`, `analyze_lines_for_guns`, `analyze_non_indexed_headers`)
-- [src/mdio/ingestion/coordinate_utils.py](src/mdio/ingestion/coordinate_utils.py)
-- [src/mdio/ingestion/validation.py](src/mdio/ingestion/validation.py) (`grid_density_qc`, `validate_spec_in_template`)
-- [src/mdio/ingestion/metadata.py](src/mdio/ingestion/metadata.py) MINUS the `attach_raw_binary_header` import (defer to PR 7)
-
-**Adjustments before commit:**
-- In [src/mdio/ingestion/metadata.py](src/mdio/ingestion/metadata.py), drop `from mdio.ingestion._raw_headers_experimental import attach_raw_binary_header` and its call inside `add_segy_file_headers`. Inline the existing on-`main` raw-binary handling instead, or stub it as a TODO (PR 7 reintroduces the gating module).
-- Trim `__init__.py` exports to only the symbols actually defined in this PR.
-- Update import sites in `mdio/converters/segy.py` and any other on-`main` callers to point at the new module locations.
-
-**Tests:** existing tests must pass unchanged (`nox -s tests-3.13`).
-
-**PR title:** `refactor(ingestion): extract analysis, coordinate, validation, and metadata helpers`
-
----
-
-## PR 2 - Add Pydantic `GridOverrides` (additive)
-
-**Goal:** the typed model lands as a public class, dict path stays alive internally.
-
-**Files:**
-- [src/mdio/segy/geometry.py](src/mdio/segy/geometry.py) - add the `GridOverrides` class **alongside** the existing `GridOverrider` (do not delete `GridOverrider` yet).
-- [src/mdio/converters/segy.py](src/mdio/converters/segy.py) - add the `isinstance(grid_overrides, dict)` shim with `DeprecationWarning` and `GridOverrides.from_legacy_dict(...)` coercion. After coercion, immediately re-dump to dict via `model_dump(by_alias=True, exclude_defaults=True)` so the still-on-`main` `GridOverrider` keeps receiving the dict it expects.
-- [src/mdio/__init__.py](src/mdio/__init__.py) - add `GridOverrides` to `__all__` and imports.
-- [tests/unit/test_grid_overrides_pydantic.py](tests/unit/test_grid_overrides_pydantic.py) - new file, copy as-is.
-
-**Acceptance:** existing `tests/unit/test_segy_grid_overrides.py` (which imports `GridOverrider`) still passes; `test_grid_overrides_pydantic.py` passes.
-
-**PR title:** `feat(api): typed Pydantic GridOverrides with legacy dict deprecation`
-
-**Changelog line:** "`mdio.GridOverrides` is now the supported way to configure grid overrides. Passing a `dict` still works but emits a `DeprecationWarning`."
-
----
-
-## PR 3 - Replace `GridOverrider` with strategy pattern
-
-**Goal:** the "modernization to grid override code" PR you called out. Behavior preserved; the monolithic `GridOverrider.run(...)` becomes `IndexStrategyRegistry().create_strategy(...)` returning composable `IndexStrategy` instances.
-
-**Files:**
-- [src/mdio/ingestion/index_strategies.py](src/mdio/ingestion/index_strategies.py) - new (`IndexStrategy` ABC + `RegularGridStrategy`, `NonBinnedStrategy`, `DuplicateHandlingStrategy`, `ChannelWrappingStrategy`, `ShotWrappingStrategy`, `ComponentSynthesisStrategy`, `CompositeStrategy`, `IndexStrategyRegistry`).
-- [src/mdio/segy/geometry.py](src/mdio/segy/geometry.py) - rewrite `GridOverrider.run(...)` as a thin shim: build a `GridOverrides` from the input dict, call `IndexStrategyRegistry`, return the `(headers, names, chunks)` tuple shape that callers expect. Keep the public signature unchanged.
-- [tests/unit/test_ingestion_index_strategies.py](tests/unit/test_ingestion_index_strategies.py) - new.
-- [tests/unit/test_segy_grid_overrides.py](tests/unit/test_segy_grid_overrides.py) - **fix**: the file currently imports `GridOverrider` and `TemplateRegistry`; either keep the shim alive (above) so this file passes unchanged, or rewrite each test to drive `IndexStrategyRegistry` directly. The branch leaves it in a broken-import state, which is the kind of footgun this PR should clean up.
-
-**Acceptance:** integration tests `tests/integration/test_import_obn_grid_overrides.py` and `tests/integration/test_import_streamer_grid_overrides.py` pass; new strategy unit tests pass; old `test_segy_grid_overrides.py` either passes or is replaced.
-
-**PR title:** `refactor(ingestion): split GridOverrider into composable IndexStrategy classes`
-
----
-
-## PR 4 - Schema resolution + template-aware dimension layout
-
-**Goal:** declarative final-schema description; templates stop having ingestion poke private attributes.
-
-**Files:**
-- [src/mdio/ingestion/schema_resolver.py](src/mdio/ingestion/schema_resolver.py) - new (`DimensionSpec`, `CoordinateSpec`, `ResolvedSchema`, `SchemaResolver`).
-- [src/mdio/builder/templates/base.py](src/mdio/builder/templates/base.py) - add `declare_coordinate_specs()` (default impl over `physical_coordinate_names` + `logical_coordinate_names`) and `apply_resolved_dimensions(dim_names, chunk_shape)`. Add the `synthesize_missing_dims` and `_calculated_dims` attributes.
-- Per-template overrides only where the default isn't right:
-  - [src/mdio/builder/templates/seismic_3d_obn.py](src/mdio/builder/templates/seismic_3d_obn.py) - per-shot-dim coord specs, `_calculated_dims = ("shot_index",)`, `synthesize_missing_dims = ("component",)`.
-  - [src/mdio/builder/templates/seismic_3d_streamer_field.py](src/mdio/builder/templates/seismic_3d_streamer_field.py)
-  - [src/mdio/builder/templates/seismic_3d_coca.py](src/mdio/builder/templates/seismic_3d_coca.py)
-  - [src/mdio/builder/templates/seismic_3d_cdp.py](src/mdio/builder/templates/seismic_3d_cdp.py), [src/mdio/builder/templates/seismic_2d_cdp.py](src/mdio/builder/templates/seismic_2d_cdp.py) - inline/crossline-indexed `cdp_x`/`cdp_y` specs.
-- [tests/unit/test_ingestion_schema_resolver.py](tests/unit/test_ingestion_schema_resolver.py) - new.
-
-**Adjustments:** nothing in this PR consumes `ResolvedSchema` at runtime - the resolver is dead code until PR 5/6 wire it. That is intentional.
-
-**Acceptance:** all template tests under `tests/unit/v1/templates/` still pass; new resolver tests pass.
-
-**PR title:** `feat(ingestion): declarative ResolvedSchema and template-driven coordinate specs`
-
----
-
-## PR 5 - `HeaderAnalyzer` + `DatasetFactory`
-
-**Goal:** add the two small services that PR 6 will compose. They have a real changelog bullet on their own ("ingestion now reads only the headers required by the resolved schema").
-
-**Files:**
-- [src/mdio/ingestion/header_analyzer.py](src/mdio/ingestion/header_analyzer.py)
-- [src/mdio/ingestion/dataset_factory.py](src/mdio/ingestion/dataset_factory.py)
-- [tests/unit/test_ingestion_header_analyzer.py](tests/unit/test_ingestion_header_analyzer.py)
-- [tests/unit/test_ingestion_dataset_factory.py](tests/unit/test_ingestion_dataset_factory.py)
-
-**Adjustments:** still no runtime use; pipeline switch is PR 6.
-
-**Acceptance:** new unit tests pass; everything else unchanged.
-
-**PR title:** `feat(ingestion): HeaderAnalyzer and DatasetFactory services`
-
----
-
-## PR 6 - Pipeline orchestrator + public API surface (the minor-version-bump PR)
-
-**Goal:** wire PRs 2-5 into a single `run_segy_ingestion`, replace the body of `segy_to_mdio` with a thin shim, surface the new public types.
-
-**Files:**
-- [src/mdio/ingestion/pipeline.py](src/mdio/ingestion/pipeline.py) - new (`run_segy_ingestion`).
-- [src/mdio/converters/segy.py](src/mdio/converters/segy.py) - body becomes the dict-deprecation shim + delegate to `run_segy_ingestion`. The `noqa: PLR0913` and old-style internals go away.
-- [src/mdio/ingestion/__init__.py](src/mdio/ingestion/__init__.py) - expand to the full `__all__` from the branch.
-- [src/mdio/__init__.py](src/mdio/__init__.py) - add `run_segy_ingestion`, `IndexStrategy`, `IndexStrategyRegistry`, `ResolvedSchema`, `CoordinateSpec`, `DimensionSpec`.
-- [src/mdio/segy/geometry.py](src/mdio/segy/geometry.py) - **delete** the legacy `GridOverrider` shim that PR 3 kept alive. All callers now route through `run_segy_ingestion`.
-- [src/mdio/commands/segy.py](src/mdio/commands/segy.py) - **fix the broken CLI**: it currently calls `segy_to_mdio(segy_path=..., index_bytes=..., index_names=..., chunksize=...)`, none of which match the v1 signature in `converters/segy.py`. Either update the CLI to construct a `SegySpec` + `mdio_template` + `GridOverrides` and call `segy_to_mdio` correctly, or wrap it in a `# TODO(v1.3): rewrite CLI for v1 API` and skip it from CI. Pick one explicitly; do not leave it silently broken.
-
-**Acceptance:** full `nox -s tests-3.13` passes; integration tests under `tests/integration/` pass; example notebooks (if any) still run.
-
-**PR title:** `feat(ingestion): run_segy_ingestion orchestrator and v1.2 public API`
-
-**Changelog highlights:**
-- New public functions/classes: `run_segy_ingestion`, `IndexStrategy`, `IndexStrategyRegistry`, `ResolvedSchema`, `CoordinateSpec`, `DimensionSpec`.
-- `segy_to_mdio` retained as a v1.x compatibility entry point.
-- Memory: ingestion now only parses headers required by the resolved schema (`HeaderAnalyzer`).
-
----
-
-## PR 7 - Gate experimental raw-headers behind `_raw_headers_experimental.py`
-
-**Goal:** isolate the `MDIO__IMPORT__RAW_HEADERS` feature so removing it later is a one-file delete.
-
-**Files:**
-- [src/mdio/ingestion/_raw_headers_experimental.py](src/mdio/ingestion/_raw_headers_experimental.py) - new. The module's own docstring is the PR description; copy it into the PR body.
-- [src/mdio/ingestion/pipeline.py](src/mdio/ingestion/pipeline.py) - call `maybe_add_raw_headers(mdio_template, mdio_ds)` after `DatasetFactory().build(...)`.
-- [src/mdio/ingestion/metadata.py](src/mdio/ingestion/metadata.py) - re-add the `attach_raw_binary_header` import + call inside `add_segy_file_headers` (the inverse of the trim in PR 1).
-- [src/mdio/builder/templates/base.py](src/mdio/builder/templates/base.py) - keep the `_add_raw_headers` method (defined in PR 4) in place; no change here unless the prior PR omitted it.
-
-**Acceptance:** with `MDIO__IMPORT__RAW_HEADERS=1` and Zarr v3, raw_headers variable appears in output; without the env var, no behavior change.
-
-**PR title:** `refactor: gate experimental raw-headers feature behind a single module`
-
----
-
-## PR 8 - Dep bump + template polish
-
-**Goal:** the leftover small stuff. Optional; can ride along with PR 6 if reviewers prefer.
-
-**Files:**
-- [uv.lock](uv.lock)
-- Per-template polish in [src/mdio/builder/templates/](src/mdio/builder/templates/) (`_dim_names`/`_calculated_dims` cleanup, dead-method removal).
-
-**Acceptance:** full test suite green; `nox -s pre-commit` clean.
-
-**PR title:** `chore(deps): bump dependencies and polish template internals`
-
----
-
-## Risks & coupling notes
-
-- **PR 4-6 are the tightest cluster.** PR 4's `ResolvedSchema` is dead code at merge time, PR 5's services are dead code at merge time, PR 6 flips runtime. This staging is intentional - it keeps each diff small and reviewable, at the cost of two PRs that "do nothing yet". If reviewers push back on dead code, fold PRs 4 and 5 together but **never** fold them into PR 6.
-- **PR 3 must keep `GridOverrider` callable** until PR 6 deletes it, otherwise existing callers in `mdio/converters/segy.py` (still on the v1.1 path) break mid-stack.
-- **`tests/unit/test_segy_grid_overrides.py`** is currently broken on the branch (imports a `GridOverrider` symbol that no longer exists in `mdio.segy.geometry`). PR 3 is the place to fix it. Do not let it land broken in a separate PR.
-- **CLI in `mdio/commands/segy.py`** has the same problem (calls v0-style `segy_to_mdio(segy_path=..., index_bytes=..., ...)`). PR 6 must either fix or explicitly defer with a tracked issue - this would be very embarrassing in a release notes diff.
-- **Autodocs.** The split gives ~8 changelog bullets instead of one "big refactor" line. PR 2 (typed config), PR 3 (strategy pattern), PR 5 (memory win), and PR 6 (new public API) are all release-note-worthy on their own.
-
-## Suggested merge order for the next two minor releases
-
-- v1.2: PRs 1, 2, 7 (no runtime change, deprecation only, easy to revert).
-- v1.3: PRs 3, 4, 5, 6, 8 (the actual architectural turn, with the orchestrator landing in PR 6 as the headline).

From 3d60cc441ac17df8e2eced73cad981a520045dd8 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 2 Jun 2026 18:51:02 +0000
Subject: [PATCH 03/11] Cleanup docstrings

---
 src/mdio/builder/templates/base.py | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/src/mdio/builder/templates/base.py b/src/mdio/builder/templates/base.py
index 928c5926..e6214895 100644
--- a/src/mdio/builder/templates/base.py
+++ b/src/mdio/builder/templates/base.py
@@ -115,11 +115,7 @@ def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
         return tuple(specs)
 
     def declare_dimension_specs(self) -> dict[str, ScalarType]:
-        """Declare the data types for each dimension in this template.
-
-        This is the single source of truth for dimension-coordinate data types: both the
-        ingestion ``SchemaResolver`` and :meth:`_add_dimension_coordinate` read from it, so
-        the resolved schema and the built dataset cannot disagree.
+        """Declare data types for each dimension in this template.
 
         Returns:
             A dictionary mapping dimension name to ScalarType.
@@ -129,23 +125,19 @@ def declare_dimension_specs(self) -> dict[str, ScalarType]:
     def _dim_dtype(self, name: str) -> ScalarType:
         """Return the declared dtype for a dimension coordinate.
 
-        Sourcing dimension-coordinate dtypes here (and in the ingestion ``SchemaResolver``)
-        from :meth:`declare_dimension_specs` keeps the built dataset and the resolved schema
-        from disagreeing without a separate post-build assertion.
-
         Args:
             name: The dimension name.
 
         Returns:
-            The declared :class:`ScalarType`, defaulting to ``INT32`` when undeclared.
+            The declared ScalarType, defaulting to INT32.
         """
         return self.declare_dimension_specs().get(name, ScalarType.INT32)
 
     def _add_dimension_coordinate(self, name: str) -> None:
-        """Add a single dimension coordinate, sourcing its dtype from :meth:`declare_dimension_specs`.
+        """Add a single dimension coordinate.
 
         Args:
-            name: The dimension name; also the coordinate name and its sole dimension.
+            name: The dimension name.
         """
         self._builder.add_coordinate(
             name,
@@ -444,7 +436,7 @@ def _add_coordinates(self) -> None:
                     raise
 
     def _add_trace_mask(self) -> None:
-        """Add trace mask variables."""
+        """Add trace mask variable."""
         self._builder.add_variable(
             name="trace_mask",
             dimensions=self.spatial_dimension_names,
@@ -454,7 +446,7 @@ def _add_trace_mask(self) -> None:
         )
 
     def _add_trace_headers(self, header_dtype: StructuredType) -> None:
-        """Add trace mask variables."""
+        """Add trace headers variable."""
         chunk_grid = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=self.full_chunk_shape[:-1]))
         self._builder.add_variable(
             name="headers",

From abbaaabeb1606b7f473ff28c323ca948a3c7eeba Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 2 Jun 2026 18:53:20 +0000
Subject: [PATCH 04/11] Remove deep copy requirement

---
 src/mdio/builder/templates/base.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/mdio/builder/templates/base.py b/src/mdio/builder/templates/base.py
index e6214895..c6e001dd 100644
--- a/src/mdio/builder/templates/base.py
+++ b/src/mdio/builder/templates/base.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import copy
 from abc import ABC
 from abc import abstractmethod
 from typing import TYPE_CHECKING
@@ -298,32 +297,32 @@ def trace_domain(self) -> str:
     @property
     def spatial_dimension_names(self) -> tuple[str, ...]:
         """Returns the names of the dimensions excluding the last axis."""
-        return copy.deepcopy(self._dim_names[:-1])
+        return self._dim_names[:-1]
 
     @property
     def dimension_names(self) -> tuple[str, ...]:
         """Returns the names of the dimensions."""
-        return copy.deepcopy(self._dim_names)
+        return self._dim_names
 
     @property
     def calculated_dimension_names(self) -> tuple[str, ...]:
-        """Returns the names of the dimensions."""
-        return copy.deepcopy(self._calculated_dims)
+        """Returns the names of the calculated dimensions."""
+        return self._calculated_dims
 
     @property
     def physical_coordinate_names(self) -> tuple[str, ...]:
         """Returns the names of the physical (world) coordinates."""
-        return copy.deepcopy(self._physical_coord_names)
+        return self._physical_coord_names
 
     @property
     def logical_coordinate_names(self) -> tuple[str, ...]:
         """Returns the names of the logical (grid) coordinates."""
-        return copy.deepcopy(self._logical_coord_names)
+        return self._logical_coord_names
 
     @property
     def coordinate_names(self) -> tuple[str, ...]:
         """Returns names of all coordinates."""
-        return copy.deepcopy(self._physical_coord_names + self._logical_coord_names)
+        return self._physical_coord_names + self._logical_coord_names
 
     @property
     def full_chunk_shape(self) -> tuple[int, ...]:

From cfb3380cc01cdd36a4ea3cd1c930097e9ff12d58 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 2 Jun 2026 19:13:14 +0000
Subject: [PATCH 05/11] Make method name more intentional and clear

---
 src/mdio/builder/templates/base.py                         | 7 ++++---
 src/mdio/builder/templates/seismic_3d_coca.py              | 5 +++--
 src/mdio/builder/templates/seismic_3d_obn.py               | 5 +++--
 src/mdio/builder/templates/seismic_3d_offset_tiles.py      | 5 +++--
 src/mdio/builder/templates/seismic_3d_receiver_gathers.py  | 5 +++--
 .../builder/templates/seismic_3d_shot_receiver_line.py     | 5 +++--
 src/mdio/builder/templates/seismic_3d_streamer_field.py    | 5 +++--
 src/mdio/builder/templates/types.py                        | 2 ++
 src/mdio/ingestion/schema/resolver.py                      | 2 +-
 9 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/src/mdio/builder/templates/base.py b/src/mdio/builder/templates/base.py
index c6e001dd..ed23668b 100644
--- a/src/mdio/builder/templates/base.py
+++ b/src/mdio/builder/templates/base.py
@@ -18,6 +18,7 @@
 from mdio.builder.schemas.v1.variable import CoordinateMetadata
 from mdio.builder.schemas.v1.variable import VariableMetadata
 from mdio.builder.templates.types import CoordinateSpec
+from mdio.builder.templates.types import DimCoordinateTypes
 
 if TYPE_CHECKING:
     from mdio.builder.schemas.v1.dataset import Dataset
@@ -113,8 +114,8 @@ def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
         )
         return tuple(specs)
 
-    def declare_dimension_specs(self) -> dict[str, ScalarType]:
-        """Declare data types for each dimension in this template.
+    def declare_dim_coordinate_types(self) -> DimCoordinateTypes:
+        """Declare data types for each dimension coordinate in this template.
 
         Returns:
             A dictionary mapping dimension name to ScalarType.
@@ -130,7 +131,7 @@ def _dim_dtype(self, name: str) -> ScalarType:
         Returns:
             The declared ScalarType, defaulting to INT32.
         """
-        return self.declare_dimension_specs().get(name, ScalarType.INT32)
+        return self.declare_dim_coordinate_types().get(name, ScalarType.INT32)
 
     def _add_dimension_coordinate(self, name: str) -> None:
         """Add a single dimension coordinate.
diff --git a/src/mdio/builder/templates/seismic_3d_coca.py b/src/mdio/builder/templates/seismic_3d_coca.py
index d83d4fd6..5974e099 100644
--- a/src/mdio/builder/templates/seismic_3d_coca.py
+++ b/src/mdio/builder/templates/seismic_3d_coca.py
@@ -7,6 +7,7 @@
 from mdio.builder.schemas.v1.variable import CoordinateMetadata
 from mdio.builder.templates.base import AbstractDatasetTemplate
 from mdio.builder.templates.types import CoordinateSpec
+from mdio.builder.templates.types import DimCoordinateTypes
 from mdio.builder.templates.types import SeismicDataDomain
 
 
@@ -34,8 +35,8 @@ def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
             CoordinateSpec(name="cdp_y", dimensions=("inline", "crossline"), dtype=ScalarType.FLOAT64),
         )
 
-    def declare_dimension_specs(self) -> dict[str, ScalarType]:
-        """Declare the data types for each dimension in this template."""
+    def declare_dim_coordinate_types(self) -> DimCoordinateTypes:
+        """Declare the data types for each dimension coordinate in this template."""
         return {
             "inline": ScalarType.INT32,
             "crossline": ScalarType.INT32,
diff --git a/src/mdio/builder/templates/seismic_3d_obn.py b/src/mdio/builder/templates/seismic_3d_obn.py
index 503f6022..de58ae53 100644
--- a/src/mdio/builder/templates/seismic_3d_obn.py
+++ b/src/mdio/builder/templates/seismic_3d_obn.py
@@ -6,6 +6,7 @@
 from mdio.builder.schemas.v1.variable import CoordinateMetadata
 from mdio.builder.templates.base import AbstractDatasetTemplate
 from mdio.builder.templates.types import CoordinateSpec
+from mdio.builder.templates.types import DimCoordinateTypes
 from mdio.builder.templates.types import SeismicDataDomain
 
 
@@ -64,8 +65,8 @@ def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
             CoordinateSpec(name="source_coord_y", dimensions=shot_dims, dtype=ScalarType.FLOAT64),
         )
 
-    def declare_dimension_specs(self) -> dict[str, ScalarType]:
-        """Declare the data types for each dimension in this template."""
+    def declare_dim_coordinate_types(self) -> DimCoordinateTypes:
+        """Declare the data types for each dimension coordinate in this template."""
         return {
             "component": ScalarType.UINT8,
             "receiver": ScalarType.UINT32,
diff --git a/src/mdio/builder/templates/seismic_3d_offset_tiles.py b/src/mdio/builder/templates/seismic_3d_offset_tiles.py
index 5eb5df54..8d0ea79d 100644
--- a/src/mdio/builder/templates/seismic_3d_offset_tiles.py
+++ b/src/mdio/builder/templates/seismic_3d_offset_tiles.py
@@ -7,6 +7,7 @@
 from mdio.builder.schemas.v1.variable import CoordinateMetadata
 from mdio.builder.templates.base import AbstractDatasetTemplate
 from mdio.builder.templates.types import CoordinateSpec
+from mdio.builder.templates.types import DimCoordinateTypes
 from mdio.builder.templates.types import SeismicDataDomain
 
 
@@ -41,8 +42,8 @@ def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
             CoordinateSpec(name="cdp_y", dimensions=("inline", "crossline"), dtype=ScalarType.FLOAT64),
         )
 
-    def declare_dimension_specs(self) -> dict[str, ScalarType]:
-        """Declare the data types for each dimension in this template."""
+    def declare_dim_coordinate_types(self) -> DimCoordinateTypes:
+        """Declare the data types for each dimension coordinate in this template."""
         return {
             "inline": ScalarType.INT32,
             "crossline": ScalarType.INT32,
diff --git a/src/mdio/builder/templates/seismic_3d_receiver_gathers.py b/src/mdio/builder/templates/seismic_3d_receiver_gathers.py
index 911ecf87..a820595d 100644
--- a/src/mdio/builder/templates/seismic_3d_receiver_gathers.py
+++ b/src/mdio/builder/templates/seismic_3d_receiver_gathers.py
@@ -7,6 +7,7 @@
 from mdio.builder.schemas.v1.variable import CoordinateMetadata
 from mdio.builder.templates.base import AbstractDatasetTemplate
 from mdio.builder.templates.types import CoordinateSpec
+from mdio.builder.templates.types import DimCoordinateTypes
 
 
 class Seismic3DReceiverGathersTemplate(AbstractDatasetTemplate):
@@ -45,8 +46,8 @@ def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
             CoordinateSpec(name="source_coord_y", dimensions=shot_dims, dtype=ScalarType.FLOAT64),
         )
 
-    def declare_dimension_specs(self) -> dict[str, ScalarType]:
-        """Declare the data types for each dimension in this template."""
+    def declare_dim_coordinate_types(self) -> DimCoordinateTypes:
+        """Declare the data types for each dimension coordinate in this template."""
         return {
             "receiver": ScalarType.UINT32,
             "shot_line": ScalarType.UINT32,
diff --git a/src/mdio/builder/templates/seismic_3d_shot_receiver_line.py b/src/mdio/builder/templates/seismic_3d_shot_receiver_line.py
index 311ca046..5f2c1ad7 100644
--- a/src/mdio/builder/templates/seismic_3d_shot_receiver_line.py
+++ b/src/mdio/builder/templates/seismic_3d_shot_receiver_line.py
@@ -6,6 +6,7 @@
 from mdio.builder.schemas.v1.variable import CoordinateMetadata
 from mdio.builder.templates.base import AbstractDatasetTemplate
 from mdio.builder.templates.types import CoordinateSpec
+from mdio.builder.templates.types import DimCoordinateTypes
 from mdio.builder.templates.types import SeismicDataDomain
 
 
@@ -45,8 +46,8 @@ def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
             CoordinateSpec(name="orig_field_record_num", dimensions=source_dims, dtype=ScalarType.UINT32),
         )
 
-    def declare_dimension_specs(self) -> dict[str, ScalarType]:
-        """Declare the data types for each dimension in this template."""
+    def declare_dim_coordinate_types(self) -> DimCoordinateTypes:
+        """Declare the data types for each dimension coordinate in this template."""
         return {
             "shot_line": ScalarType.UINT32,
             "shot_point": ScalarType.UINT32,
diff --git a/src/mdio/builder/templates/seismic_3d_streamer_field.py b/src/mdio/builder/templates/seismic_3d_streamer_field.py
index d8e23368..b7cf1c62 100644
--- a/src/mdio/builder/templates/seismic_3d_streamer_field.py
+++ b/src/mdio/builder/templates/seismic_3d_streamer_field.py
@@ -6,6 +6,7 @@
 from mdio.builder.schemas.v1.variable import CoordinateMetadata
 from mdio.builder.templates.base import AbstractDatasetTemplate
 from mdio.builder.templates.types import CoordinateSpec
+from mdio.builder.templates.types import DimCoordinateTypes
 from mdio.builder.templates.types import SeismicDataDomain
 
 
@@ -52,8 +53,8 @@ def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
             CoordinateSpec(name="group_coord_y", dimensions=receiver_dims, dtype=ScalarType.FLOAT64),
         )
 
-    def declare_dimension_specs(self) -> dict[str, ScalarType]:
-        """Declare the data types for each dimension in this template."""
+    def declare_dim_coordinate_types(self) -> DimCoordinateTypes:
+        """Declare the data types for each dimension coordinate in this template."""
         return {
             "sail_line": ScalarType.UINT32,
             "gun": ScalarType.UINT8,
diff --git a/src/mdio/builder/templates/types.py b/src/mdio/builder/templates/types.py
index 70d9de57..f333be52 100644
--- a/src/mdio/builder/templates/types.py
+++ b/src/mdio/builder/templates/types.py
@@ -11,6 +11,8 @@
 
 CdpGatherDomain: TypeAlias = Literal["offset", "angle"]
 
+DimCoordinateTypes: TypeAlias = dict[str, ScalarType]
+
 
 class CoordinateSpec(BaseModel):
     """Specification for a non-dimension coordinate declared by a template.
diff --git a/src/mdio/ingestion/schema/resolver.py b/src/mdio/ingestion/schema/resolver.py
index 8dffd12e..45053c2a 100644
--- a/src/mdio/ingestion/schema/resolver.py
+++ b/src/mdio/ingestion/schema/resolver.py
@@ -59,7 +59,7 @@ def resolve(
     def _template_to_schema(self, template: AbstractDatasetTemplate) -> ResolvedSchema:
         """Convert a template to a resolved schema without overrides."""
         calculated = set(template.calculated_dimension_names)
-        dim_dtypes = template.declare_dimension_specs()
+        dim_dtypes = template.declare_dim_coordinate_types()
         dimensions = [
             DimensionSpec(
                 name=name,

From daad52ee808a348f64e4af3da0a4ff2598d1536e Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Thu, 4 Jun 2026 13:16:53 +0000
Subject: [PATCH 06/11] Deslop and cleanup comments

---
 src/mdio/ingestion/segy/index_strategies.py | 151 +++++++++-----------
 1 file changed, 69 insertions(+), 82 deletions(-)

diff --git a/src/mdio/ingestion/segy/index_strategies.py b/src/mdio/ingestion/segy/index_strategies.py
index 6e80b29f..8e533f3e 100644
--- a/src/mdio/ingestion/segy/index_strategies.py
+++ b/src/mdio/ingestion/segy/index_strategies.py
@@ -1,13 +1,11 @@
 """Composable index strategies for transforming SEG-Y headers into indexable dimensions.
 
-This module replaces the monolithic :class:`mdio.segy.geometry.GridOverrider` command
-dispatch with a small set of single-responsibility :class:`IndexStrategy` objects that can
-be composed via :class:`CompositeStrategy`.
-
-Strategies are selected by :class:`IndexStrategyRegistry` from the typed
-:class:`mdio.segy.geometry.GridOverrides` configuration plus optional template hints. The
-public contract preserved by :class:`mdio.segy.geometry.GridOverrider` (a thin shim around
-this module) keeps end-to-end ingestion behavior identical to v1.1.x.
+This module replaces the monolithic `GridOverrider` command dispatch with a small set of
+single-responsibility `IndexStrategy` objects that can be composed via `CompositeStrategy`.
+
+Strategies are selected by `IndexStrategyRegistry` from the typed `GridOverrides`
+configuration plus optional template hints. The public contract preserved by `GridOverrider`
+(a thin shim around this module) keeps end-to-end ingestion behavior identical.
 """
 
 from __future__ import annotations
@@ -46,31 +44,31 @@
 class IndexStrategy(ABC):
     """Abstract base for header indexing strategies.
 
-    A strategy transforms a raw header array (e.g. add or rebase fields) and computes
-    the resulting :class:`Dimension` list. Strategies are composable through
-    :class:`CompositeStrategy`. The default :meth:`compute_dimensions` builds dimensions
-    from unique header values; subclasses override only when they need different
-    semantics (currently just :class:`CompositeStrategy`).
+    A strategy transforms a raw header array (e.g., adding or rebasing fields) and
+    computes the resulting `Dimension` list. Strategies are composable through
+    `CompositeStrategy`. The default `compute_dimensions` builds dimensions from unique
+    header values; subclasses override only when they need different semantics
+    (currently just `CompositeStrategy`).
 
-    Subclasses with header preconditions set :attr:`required_keys` so the shim and
-    :class:`CompositeStrategy` can raise :class:`GridOverrideKeysError` with a clear
-    "missing fields X, Y, Z" message before numpy fails on a deeper key lookup.
+    Subclasses with header preconditions set `required_keys` so the shim and
+    `CompositeStrategy` can raise `GridOverrideKeysError` with a clear
+    "missing fields" message before NumPy fails on a deeper key lookup.
     """
 
     @property
     def required_keys(self) -> frozenset[str]:
-        """Header field names that must be present before :meth:`transform_headers` runs.
+        """Header field names that must be present before `transform_headers` runs.
 
         Empty by default. Override on subclasses whose transform indexes specific fields.
         """
         return frozenset()
 
     def validate_headers(self, headers: HeaderArray) -> None:
-        """Raise :class:`GridOverrideKeysError` if any required header field is missing.
+        """Raise `GridOverrideKeysError` if any required header field is missing.
 
-        Callers (the :class:`mdio.segy.geometry.GridOverrider` shim and
-        :class:`CompositeStrategy`) invoke this before each transform so failure points
-        at the user-facing override name rather than at a numpy structured-array key error.
+        Callers (the `GridOverrider` shim and `CompositeStrategy`) invoke this before each
+        transform so failure points at the user-facing override name rather than at a NumPy
+        structured-array key error.
         """
         required = self.required_keys
         if not required:
@@ -84,10 +82,9 @@ def transform_headers(self, headers: HeaderArray) -> HeaderArray:
         """Return a new header array with this strategy's transformation applied."""
 
     def compute_dimensions(self, headers: HeaderArray, dim_names: tuple[str, ...]) -> list[Dimension]:
-        """Build one :class:`Dimension` per requested name from unique header values.
+        """Build one `Dimension` per requested name from unique header values.
 
-        Names absent from ``headers.dtype.names`` are silently skipped, matching the v1.1
-        ``GridOverrider`` post-processing step.
+        Names absent from `headers.dtype.names` are silently skipped.
         """
         return [
             Dimension(coords=np.unique(headers[name]), name=name) for name in dim_names if name in headers.dtype.names
@@ -108,21 +105,20 @@ def transform_headers(self, headers: HeaderArray) -> HeaderArray:
 
 
 class DuplicateHandlingStrategy(IndexStrategy):
-    """Disambiguate duplicate index tuples by appending a per-tuple ``trace`` counter.
+    """Disambiguate duplicate index tuples by appending a per-tuple `trace` counter.
 
-    Mirrors the v1.1 ``DuplicateIndex`` command: count occurrences of each unique
-    combination of dimension fields (excluding coordinate fields and any caller-declared
-    ``excluded_fields``), then attach the resulting 1-based counter as a new ``trace`` field
-    on the original headers.
+    Counts occurrences of each unique combination of dimension fields (excluding
+    coordinate fields and any caller-declared `excluded_fields`), then attaches the
+    resulting 1-based counter as a new `trace` field on the original headers.
 
     Args:
         coord_fields: Names of header fields that are template coordinates and must be
             excluded from the dimension grouping (their values vary independently of the
             grid index).
         excluded_fields: Additional fields to exclude from grouping. Used by
-            :class:`NonBinnedStrategy` to keep the explicit ``non_binned_dims`` from
+            `NonBinnedStrategy` to keep the explicit `non_binned_dims` from
             polluting the per-tuple counter.
-        dtype: NumPy dtype for the appended ``trace`` counter.
+        dtype: NumPy dtype for the appended `trace` counter.
     """
 
     def __init__(
@@ -144,7 +140,7 @@ def _dim_fields(self, headers: HeaderArray) -> list[str]:
         ]
 
     def transform_headers(self, headers: HeaderArray) -> HeaderArray:
-        """Append a per-(dim-tuple) ``trace`` counter to ``headers``."""
+        """Append a per-dimension-tuple `trace` counter to headers."""
         dim_fields = self._dim_fields(headers)
         dim_headers = headers[dim_fields] if dim_fields else headers
         with_trace = analyze_non_indexed_headers(dim_headers, dtype=self.dtype)
@@ -157,19 +153,18 @@ def transform_headers(self, headers: HeaderArray) -> HeaderArray:
 
 
 class NonBinnedStrategy(DuplicateHandlingStrategy):
-    """Collapse selected non-binned dimensions into a single ``trace`` dimension.
+    """Collapse selected non-binned dimensions into a single `trace` dimension.
 
-    Inherits the per-tuple ``trace`` counter from :class:`DuplicateHandlingStrategy` and
-    captures ``chunksize`` so the :class:`mdio.segy.geometry.GridOverrider` shim can size
-    the new ``trace`` chunk correctly.
+    Inherits the per-tuple `trace` counter from `DuplicateHandlingStrategy` and
+    captures `chunksize` so the `GridOverrider` shim can size the new `trace` chunk correctly.
 
     Args:
-        chunksize: Chunk size to assign to the ``trace`` dimension. The strategy itself
+        chunksize: Chunk size to assign to the `trace` dimension. The strategy itself
             does not apply this value; the shim uses it when rewriting the chunksize tuple.
-        non_binned_dims: Header fields collapsed into ``trace``. They are excluded from
+        non_binned_dims: Header fields collapsed into `trace`. They are excluded from
             the duplicate grouping so the counter only varies along the remaining dims.
         coord_fields: Template coordinate names to exclude from grouping.
-        dtype: NumPy dtype for the appended ``trace`` counter.
+        dtype: NumPy dtype for the appended `trace` counter.
     """
 
     def __init__(
@@ -193,8 +188,7 @@ class ChannelWrappingStrategy(IndexStrategy):
     """Renumber streamer channels per cable when geometry is Type B.
 
     Detects whether channel numbering is per-cable (Type A; pass-through) or sequential
-    across cables (Type B; rebase to 1..N per cable). Mirrors the v1.1 ``AutoChannelWrap``
-    command.
+    across cables (Type B; rebase to 1..N per cable).
     """
 
     @property
@@ -203,7 +197,7 @@ def required_keys(self) -> frozenset[str]:
         return frozenset({"shot_point", "cable", "channel"})
 
     def transform_headers(self, headers: HeaderArray) -> HeaderArray:
-        """Rebase ``channel`` per cable for Type B geometry; pass through for Type A."""
+        """Rebase `channel` per cable for Type B geometry; pass through for Type A."""
         unique_cables, cable_chan_min, cable_chan_max, geom_type = analyze_streamer_headers(headers)
 
         logger.info("Ingesting dataset as %s", geom_type.name)
@@ -221,20 +215,19 @@ def transform_headers(self, headers: HeaderArray) -> HeaderArray:
 
 
 class ShotWrappingStrategy(IndexStrategy):
-    """Derive a dense ``shot_index`` field from sparse or interleaved ``shot_point`` values.
+    """Derive a dense `shot_index` field from sparse or interleaved `shot_point` values.
 
-    Replaces the v1.1 ``AutoShotWrap`` (streamer) and ``CalculateShotIndex`` (OBN)
-    commands. The two callers differ only in:
+    The two configurations differ in:
 
-    * ``line_field`` -- ``sail_line`` for streamer, ``shot_line`` for OBN.
-    * ``always_calculate`` -- streamer skips the transform entirely for Type A geometries
-      (per-gun shot points are already dense), OBN always emits ``shot_index`` because the
+    * `line_field` -- `sail_line` for streamer, `shot_line` for OBN.
+    * `always_calculate` -- streamer skips the transform entirely for Type A geometries
+      (per-gun shot points are already dense), OBN always emits `shot_index` because the
       template declares it as a calculated dimension.
 
     Args:
         line_field: Header field used to group shots into independent lines.
-        always_calculate: When ``True``, emit ``shot_index`` for every geometry type. For
-            Type A this builds a 0-based ``np.searchsorted`` over sorted unique shot
+        always_calculate: When `True`, emit `shot_index` for every geometry type. For
+            Type A this builds a 0-based `np.searchsorted` over sorted unique shot
             points per line.
     """
 
@@ -246,18 +239,14 @@ def __init__(self, line_field: str, always_calculate: bool = False) -> None:
 
     @property
     def required_keys(self) -> frozenset[str]:
-        """Streamer (``sail_line``) needs cable+channel too; OBN (``shot_line``) does not.
-
-        Mirrors the v1.1 split between ``AutoShotWrap.required_keys`` and
-        ``CalculateShotIndex.required_keys``.
-        """
+        """Streamer (`sail_line`) needs cable and channel too; OBN (`shot_line`) does not."""
         base = {self.line_field, "gun", "shot_point"}
         if self.line_field == self._STREAMER_LINE_FIELD:
             base |= {"cable", "channel"}
         return frozenset(base)
 
     def transform_headers(self, headers: HeaderArray) -> HeaderArray:
-        """Append ``shot_index`` derived from ``shot_point`` per line."""
+        """Append `shot_index` derived from `shot_point` per line."""
         unique_lines, unique_guns_per_line, geom_type = analyze_lines_for_guns(headers, line_field=self.line_field)
 
         logger.info("Ingesting dataset as shot type: %s (line_field=%s)", geom_type.name, self.line_field)
@@ -294,9 +283,8 @@ def transform_headers(self, headers: HeaderArray) -> HeaderArray:
 class ComponentSynthesisStrategy(IndexStrategy):
     """Synthesize template-required dimension fields that are absent from the headers.
 
-    Currently used to fill the ``component`` dimension with a constant value of 1 for
-    OBN templates whose SEG-Y spec does not include a component header. Mirrors the
-    v1.1 ``GridOverrider._synthesize_obn_component`` behavior.
+    Currently used to fill the `component` dimension with a constant value of 1 for
+    OBN templates whose SEG-Y spec does not include a component header.
 
     Args:
         synthesize_dims: Names of dimension fields to synthesize when missing.
@@ -338,8 +326,8 @@ def transform_headers(self, headers: HeaderArray) -> HeaderArray:
         """Validate then run each child strategy's transform in sequence.
 
         Each step re-validates against the running header array, so a strategy that
-        produces a field (e.g. :class:`ComponentSynthesisStrategy` adding ``component``)
-        can satisfy a later strategy's :attr:`required_keys`.
+        produces a field (e.g. `ComponentSynthesisStrategy` adding `component`)
+        can satisfy a later strategy's `required_keys`.
         """
         result = headers
         for strategy in self.strategies:
@@ -354,26 +342,25 @@ def compute_dimensions(self, headers: HeaderArray, dim_names: tuple[str, ...]) -
 
 
 class IndexStrategyRegistry:
-    """Picks the right :class:`IndexStrategy` from grid overrides + template hints.
+    """Picks the right `IndexStrategy` from grid overrides and template hints.
 
     The registry is the single source of truth for override semantics: it maps a
-    :class:`~mdio.segy.geometry.GridOverrides` both to the header-transforming
-    :class:`IndexStrategy` (:meth:`create_strategy`) and to the schema-reshaping
-    :class:`~mdio.ingestion.schema.models.SchemaEffect` (:meth:`schema_effect`). Keeping
-    both derivations here prevents the header view and the schema view from drifting.
+    `GridOverrides` both to the header-transforming `IndexStrategy` (`create_strategy`)
+    and to the schema-reshaping `SchemaEffect` (`schema_effect`). Keeping both derivations
+    here prevents the header view and the schema view from drifting.
     """
 
     def schema_effect(self, grid_overrides: GridOverrides | None) -> SchemaEffect | None:
-        """Return the schema reshaping implied by ``grid_overrides``, if any.
+        """Return the schema reshaping implied by `grid_overrides`, if any.
 
-        Only ``non_binned`` and ``has_duplicates`` change the dimension/coordinate layout;
+        Only `non_binned` and `has_duplicates` change the dimension/coordinate layout;
         every other override transforms headers in place without reshaping the schema.
 
         Args:
-            grid_overrides: Typed grid override configuration, or ``None``.
+            grid_overrides: Typed grid override configuration, or `None`.
 
         Returns:
-            The matching :class:`SchemaEffect`, or ``None`` when no layout change applies.
+            The matching `SchemaEffect`, or `None` when no layout change applies.
         """
         if not grid_overrides:
             return None
@@ -392,27 +379,27 @@ def create_strategy(
     ) -> IndexStrategy:
         """Build a strategy (possibly composite) for the given config.
 
-        Strategy ordering, when multiple flags are set, mirrors v1.1 behavior:
+        Strategy ordering, when multiple flags are set, mirrors previous behavior:
 
-        1. ``ComponentSynthesisStrategy`` (so later strategies can rely on the synthesized
+        1. `ComponentSynthesisStrategy` (so later strategies can rely on the synthesized
            field being present).
-        2. ``ChannelWrappingStrategy`` (rebases ``channel`` before any shot calculation).
-        3. ``ShotWrappingStrategy`` for ``auto_shot_wrap`` (streamer; ``sail_line``).
-        4. ``ShotWrappingStrategy`` for ``calculate_shot_index`` (OBN; ``shot_line``,
-           ``always_calculate=True``).
-        5. ``NonBinnedStrategy`` or ``DuplicateHandlingStrategy`` (mutually exclusive;
-           ``non_binned`` wins when both are set, matching v1.x semantics).
+        2. `ChannelWrappingStrategy` (rebases `channel` before any shot calculation).
+        3. `ShotWrappingStrategy` for `auto_shot_wrap` (streamer; `sail_line`).
+        4. `ShotWrappingStrategy` for `calculate_shot_index` (OBN; `shot_line`,
+           `always_calculate=True`).
+        5. `NonBinnedStrategy` or `DuplicateHandlingStrategy` (mutually exclusive;
+           `non_binned` wins when both are set).
 
         Args:
-            grid_overrides: Typed grid override configuration, or ``None`` for no
+            grid_overrides: Typed grid override configuration, or `None` for no
                 user-driven overrides.
-            synthesize_dims: Dimensions to synthesize if missing (e.g. ``component``).
+            synthesize_dims: Dimensions to synthesize if missing (e.g., `component`).
             template: Optional dataset template; used to look up coordinate names so
                 duplicate-handling counters group on dimension fields only.
 
         Returns:
-            A single :class:`IndexStrategy` instance. Returns
-            :class:`RegularGridStrategy` when no overrides and no synthesis are required.
+            A single `IndexStrategy` instance. Returns `RegularGridStrategy` when no
+            overrides and no synthesis are required.
         """
         strategies: list[IndexStrategy] = []
 

From bf39d7f216e7d1f71fedb99edf29cb72d5601694 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Thu, 4 Jun 2026 13:21:03 +0000
Subject: [PATCH 07/11] Make base `ingestion` module more generic and move segy
 specific vocabulary and requirements up the hierarchy

---
 src/mdio/ingestion/schema/models.py           |  6 ++----
 src/mdio/ingestion/segy/reader.py             |  7 ++++++-
 src/mdio/ingestion/segy/validation.py         |  5 ++++-
 tests/unit/ingestion/test_schema_resolver.py  | 11 ++++++-----
 tests/unit/ingestion/test_segy_reader.py      |  6 +++---
 .../ingestion/test_segy_spec_validation.py    | 19 ++++++++++++++++++-
 tests/unit/ingestion/test_segy_validation.py  | 18 +++++++++++++++++-
 7 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/src/mdio/ingestion/schema/models.py b/src/mdio/ingestion/schema/models.py
index cce198f9..e9d72b26 100644
--- a/src/mdio/ingestion/schema/models.py
+++ b/src/mdio/ingestion/schema/models.py
@@ -60,12 +60,10 @@ class ResolvedSchema(BaseModel):
     metadata: dict[str, Any] = Field(default_factory=dict)
     default_variable_name: str = "amplitude"
 
-    def required_header_fields(self) -> set[str]:
-        """Names that must be readable from SEG-Y trace headers to materialize this schema."""
+    def required_fields(self) -> set[str]:
+        """Names that must be readable from the source to materialize this schema."""
         fields = {dim.name for dim in self.dimensions if dim.is_spatial and not dim.is_calculated}
         fields.update(coord.name for coord in self.coordinates)
-        # coordinate_scalar is always needed to scale X/Y coordinates.
-        fields.add("coordinate_scalar")
         return fields
 
     def spatial_dimensions(self) -> list[DimensionSpec]:
diff --git a/src/mdio/ingestion/segy/reader.py b/src/mdio/ingestion/segy/reader.py
index 9acf4b30..990966c9 100644
--- a/src/mdio/ingestion/segy/reader.py
+++ b/src/mdio/ingestion/segy/reader.py
@@ -8,6 +8,7 @@
 from mdio.core.dimension import Dimension
 from mdio.ingestion.segy.index_strategies import IndexStrategyRegistry
 from mdio.segy.parsers import parse_headers
+from mdio.segy.scalar import SCALE_COORDINATE_KEYS
 
 if TYPE_CHECKING:
     import numpy as np
@@ -49,7 +50,11 @@ def read_index_headers(  # noqa: PLR0913
     spec_fields = {field.name for field in spec.trace.header.fields} if spec else set()
 
     # Drop any synthesized or missing dimensions/coordinates that aren't in the physical file spec
-    subset = tuple(f for f in schema.required_header_fields() if f in spec_fields)
+    required_fields = schema.required_fields()
+    if any(field in SCALE_COORDINATE_KEYS for field in required_fields):
+        required_fields = required_fields | {"coordinate_scalar"}
+
+    subset = tuple(f for f in required_fields if f in spec_fields)
 
     # 2. Parse headers
     parsed_headers = parse_headers(
diff --git a/src/mdio/ingestion/segy/validation.py b/src/mdio/ingestion/segy/validation.py
index f9a6d079..3828fda3 100644
--- a/src/mdio/ingestion/segy/validation.py
+++ b/src/mdio/ingestion/segy/validation.py
@@ -4,6 +4,8 @@
 
 from typing import TYPE_CHECKING
 
+from mdio.segy.scalar import SCALE_COORDINATE_KEYS
+
 if TYPE_CHECKING:
     from segy.schema import SegySpec
 
@@ -24,7 +26,8 @@ def validate_spec_in_template(segy_spec: SegySpec, mdio_template: AbstractDatase
     if isinstance(mdio_template, Seismic3DObnReceiverGathersTemplate):
         required_fields.discard("component")
 
-    required_fields = required_fields | {"coordinate_scalar"}
+    if any(field in SCALE_COORDINATE_KEYS for field in required_fields):
+        required_fields = required_fields | {"coordinate_scalar"}
     missing_fields = required_fields - header_fields
 
     if missing_fields:
diff --git a/tests/unit/ingestion/test_schema_resolver.py b/tests/unit/ingestion/test_schema_resolver.py
index 64fc469a..ba748115 100644
--- a/tests/unit/ingestion/test_schema_resolver.py
+++ b/tests/unit/ingestion/test_schema_resolver.py
@@ -35,14 +35,15 @@ def test_obn_template_marks_shot_index_as_calculated(self) -> None:
         assert shot_index.is_calculated is True
         assert shot_index.is_spatial is True
 
-    def test_cdp_required_header_fields(self) -> None:
-        """Required header fields cover spatial dims, coordinates, and ``coordinate_scalar``."""
+    def test_cdp_required_fields(self) -> None:
+        """Required fields cover spatial dims and coordinates."""
         template = Seismic3DCdpGathersTemplate(data_domain="time", gather_domain="offset")
         schema = SchemaResolver().resolve(template, grid_overrides=None)
 
-        # Spatial dim header keys + coordinate header keys + always-present coordinate_scalar.
-        required = schema.required_header_fields()
-        assert {"inline", "crossline", "offset", "cdp_x", "cdp_y", "coordinate_scalar"}.issubset(required)
+        # Spatial dim keys + coordinate keys.
+        required = schema.required_fields()
+        assert {"inline", "crossline", "offset", "cdp_x", "cdp_y"}.issubset(required)
+        assert "coordinate_scalar" not in required
 
 
 class TestSchemaResolverNonBinned:
diff --git a/tests/unit/ingestion/test_segy_reader.py b/tests/unit/ingestion/test_segy_reader.py
index 5017902e..9aefc2f7 100644
--- a/tests/unit/ingestion/test_segy_reader.py
+++ b/tests/unit/ingestion/test_segy_reader.py
@@ -92,7 +92,7 @@ def test_subset_excludes_fields_absent_from_spec(self) -> None:
         )
         headers = make_header_array({"inline": np.array([1, 2], dtype=np.int32)})
         # spec lacks 'component' on purpose.
-        segy_file_kwargs = {"spec": _spec_with_fields("inline", "coordinate_scalar")}
+        segy_file_kwargs = {"spec": _spec_with_fields("inline")}
 
         with patch(f"{_READER}.parse_headers", return_value=headers) as mock_parse:
             reader.read_index_headers(
@@ -105,7 +105,7 @@ def test_subset_excludes_fields_absent_from_spec(self) -> None:
 
         subset = set(mock_parse.call_args.kwargs["subset"])
         assert "component" not in subset
-        assert subset == {"inline", "coordinate_scalar"}
+        assert subset == {"inline"}
 
     def test_synthesize_dims_produces_missing_dimension(self) -> None:
         """A synthesized dim absent from headers is created and yields a dimension."""
@@ -118,7 +118,7 @@ def test_synthesize_dims_produces_missing_dimension(self) -> None:
             coordinates=[],
         )
         headers = make_header_array({"receiver": np.array([5, 6, 7], dtype=np.uint32)})
-        segy_file_kwargs = {"spec": _spec_with_fields("receiver", "coordinate_scalar")}
+        segy_file_kwargs = {"spec": _spec_with_fields("receiver")}
 
         with patch(f"{_READER}.parse_headers", return_value=headers):
             indexed, dimensions = reader.read_index_headers(
diff --git a/tests/unit/ingestion/test_segy_spec_validation.py b/tests/unit/ingestion/test_segy_spec_validation.py
index 10115532..8a995f18 100644
--- a/tests/unit/ingestion/test_segy_spec_validation.py
+++ b/tests/unit/ingestion/test_segy_spec_validation.py
@@ -53,7 +53,7 @@ def test_validation_fails_with_missing_fields(self) -> None:
         assert "CustomTemplate" in error_message
 
     def test_validation_fails_with_missing_coordinate_scalar(self) -> None:
-        """Test that validation fails when coordinate_scalar is missing, even with all other fields."""
+        """Test that validation fails when coordinate_scalar is missing and fields require scaling."""
         template = MagicMock(spec=AbstractDatasetTemplate)
         template.name = "TestTemplate"
         template.spatial_dimension_names = ("inline", "crossline")
@@ -73,3 +73,20 @@ def test_validation_fails_with_missing_coordinate_scalar(self) -> None:
         error_message = str(exc_info.value)
         assert "coordinate_scalar" in error_message
         assert "TestTemplate" in error_message
+
+    def test_validation_passes_with_missing_coordinate_scalar_when_not_needed(self) -> None:
+        """Test that validation passes when coordinate_scalar is missing but no fields require scaling."""
+        template = MagicMock(spec=AbstractDatasetTemplate)
+        template.name = "TestTemplate"
+        template.spatial_dimension_names = ("inline", "crossline")
+        template.coordinate_names = ("source_to_receiver_distance",)
+
+        # Create SegySpec with all standard fields except coordinate_scalar
+        spec = get_segy_standard(1.0)
+        # Remove coordinate_scalar from the standard fields
+        standard_fields = [field for field in spec.trace.header.fields if field.name != "coordinate_scalar"]
+        standard_fields.append(HeaderField(name="not_coordinate_scalar", byte=71, format="int16"))
+        segy_spec = spec.customize(trace_header_fields=standard_fields)
+
+        # Should not raise any exception
+        validate_spec_in_template(segy_spec, template)
diff --git a/tests/unit/ingestion/test_segy_validation.py b/tests/unit/ingestion/test_segy_validation.py
index 8159ec51..62a73589 100644
--- a/tests/unit/ingestion/test_segy_validation.py
+++ b/tests/unit/ingestion/test_segy_validation.py
@@ -48,7 +48,7 @@ def test_missing_fields_listed_in_error(self) -> None:
         assert "CustomTemplate" in msg
 
     def test_missing_coordinate_scalar_raises(self) -> None:
-        """A spec without ``coordinate_scalar`` must always fail."""
+        """A spec without ``coordinate_scalar`` must fail if fields require scaling."""
         template = MagicMock(spec=AbstractDatasetTemplate)
         template.name = "TestTemplate"
         template.spatial_dimension_names = ("inline", "crossline")
@@ -63,6 +63,22 @@ def test_missing_coordinate_scalar_raises(self) -> None:
         with pytest.raises(ValueError, match=r"coordinate_scalar"):
             validate_spec_in_template(spec, template)
 
+    def test_missing_coordinate_scalar_passes_if_not_needed(self) -> None:
+        """A spec without ``coordinate_scalar`` passes if no fields require scaling."""
+        template = MagicMock(spec=AbstractDatasetTemplate)
+        template.name = "TestTemplate"
+        template.spatial_dimension_names = ("inline", "crossline")
+        template.coordinate_names = ("source_to_receiver_distance",)
+        template.calculated_dimension_names = ()
+
+        spec = get_segy_standard(1.0)
+        kept = [f for f in spec.trace.header.fields if f.name != "coordinate_scalar"]
+        kept.append(HeaderField(name="not_coordinate_scalar", byte=71, format="int16"))
+        spec = spec.customize(trace_header_fields=kept)
+
+        # Should not raise ValueError because 'offset' is not in SCALE_COORDINATE_KEYS
+        validate_spec_in_template(spec, template)
+
     def test_calculated_dimensions_are_not_required(self) -> None:
         """Dimensions in ``calculated_dimension_names`` should not be required from the spec."""
         template = MagicMock(spec=AbstractDatasetTemplate)

From 922d1da1ed7475277911dcea53f528b3b81b5c2a Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Thu, 4 Jun 2026 14:01:32 +0000
Subject: [PATCH 08/11] Fix public interface to be `segy_to_mdio`

---
 src/mdio/__init__.py                | 2 --
 src/mdio/converters/segy.py         | 5 +++--
 src/mdio/ingestion/__init__.py      | 4 ++--
 src/mdio/ingestion/segy/pipeline.py | 4 ++--
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/mdio/__init__.py b/src/mdio/__init__.py
index bfae0f30..efd76492 100644
--- a/src/mdio/__init__.py
+++ b/src/mdio/__init__.py
@@ -9,7 +9,6 @@
 from mdio.converters import mdio_to_segy
 from mdio.converters import segy_to_mdio
 from mdio.ingestion import ResolvedSchema
-from mdio.ingestion import run_segy_ingestion
 from mdio.optimize.access_pattern import OptimizedAccessPatternConfig
 from mdio.optimize.access_pattern import optimize_access_patterns
 from mdio.segy.geometry import GridOverrides
@@ -29,6 +28,5 @@
     "segy_to_mdio",
     "OptimizedAccessPatternConfig",
     "optimize_access_patterns",
-    "run_segy_ingestion",
     "ResolvedSchema",
 ]
diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py
index 8692b40e..42ee7d7b 100644
--- a/src/mdio/converters/segy.py
+++ b/src/mdio/converters/segy.py
@@ -5,7 +5,6 @@
 import logging
 from typing import TYPE_CHECKING
 
-from mdio.ingestion.segy.pipeline import run_segy_ingestion
 from mdio.segy.geometry import GridOverrides
 
 logger = logging.getLogger(__name__)
@@ -67,7 +66,9 @@ def segy_to_mdio(  # noqa: PLR0913
     """
     typed_grid_overrides = _coerce_grid_overrides(grid_overrides)
 
-    return run_segy_ingestion(
+    from mdio.ingestion.segy.pipeline import segy_to_mdio as _ingest_segy_to_mdio  # noqa: PLC0415
+
+    return _ingest_segy_to_mdio(
         segy_spec=segy_spec,
         mdio_template=mdio_template,
         input_path=input_path,
diff --git a/src/mdio/ingestion/__init__.py b/src/mdio/ingestion/__init__.py
index 6624f7e6..91047c18 100644
--- a/src/mdio/ingestion/__init__.py
+++ b/src/mdio/ingestion/__init__.py
@@ -8,12 +8,12 @@
 from mdio.ingestion.schema import ResolvedSchema
 from mdio.ingestion.segy.index_strategies import IndexStrategy
 from mdio.ingestion.segy.index_strategies import IndexStrategyRegistry
-from mdio.ingestion.segy.pipeline import run_segy_ingestion
+from mdio.ingestion.segy.pipeline import segy_to_mdio
 
 __all__ = [
     "DimensionSpec",
     "IndexStrategy",
     "IndexStrategyRegistry",
     "ResolvedSchema",
-    "run_segy_ingestion",
+    "segy_to_mdio",
 ]
diff --git a/src/mdio/ingestion/segy/pipeline.py b/src/mdio/ingestion/segy/pipeline.py
index 8dd69706..0752d360 100644
--- a/src/mdio/ingestion/segy/pipeline.py
+++ b/src/mdio/ingestion/segy/pipeline.py
@@ -112,7 +112,7 @@ def _build_grid(dimensions: list[Dimension], indexed_headers: np.ndarray, num_tr
     return grid
 
 
-def run_segy_ingestion(  # noqa: PLR0913
+def segy_to_mdio(  # noqa: PLR0913
     segy_spec: SegySpec,
     mdio_template: AbstractDatasetTemplate,
     input_path: UPath | Path | str,
@@ -135,7 +135,7 @@ def run_segy_ingestion(  # noqa: PLR0913
         grid_overrides: Grid override configuration for non-standard geometries.
         segy_header_overrides: Option to override specific SEG-Y headers during ingestion.
 
-    Raises:
+    Raises:  # noqa: DOC502
         FileExistsError: If the output location already exists and overwrite is False.
         ValueError: If required fields are missing from segy_spec or required computed
             dimensions are not produced after grid overrides are applied.

From cae31bb90cc6225f088f7ebf22ee504befd78a05 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Thu, 4 Jun 2026 14:06:35 +0000
Subject: [PATCH 09/11] Update raw headers warning with phase-out plan

---
 src/mdio/ingestion/segy/raw_headers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mdio/ingestion/segy/raw_headers.py b/src/mdio/ingestion/segy/raw_headers.py
index fea93562..fff55498 100644
--- a/src/mdio/ingestion/segy/raw_headers.py
+++ b/src/mdio/ingestion/segy/raw_headers.py
@@ -48,7 +48,7 @@ def build_raw_header_variables(schema: ResolvedSchema) -> list[dict[str, Any]]:
         logger.warning("Raw headers are only supported for Zarr v3. Skipping raw headers.")
         return []
 
-    logger.warning("MDIO__IMPORT__RAW_HEADERS is experimental and expected to change or be removed.")
+    logger.warning("MDIO__IMPORT__RAW_HEADERS is planned to be deprecated in release 1.3 and removed in release 1.4.")
     spatial_dim_names = tuple(dim.name for dim in schema.dimensions if dim.is_spatial)
     chunk_metadata = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=schema.chunk_shape[:-1]))
     return [

From 21490a38f792bc8582fb1ef86a15b3a4fa68a170 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Thu, 4 Jun 2026 16:04:56 +0000
Subject: [PATCH 10/11] Cleanup issues

---
 docs/tutorials/custom_template.ipynb          | 608 +++++++++---------
 src/mdio/builder/templates/base.py            |  56 +-
 src/mdio/builder/templates/seismic_3d_obn.py  |   4 +-
 src/mdio/converters/segy.py                   |   7 +-
 src/mdio/ingestion/segy/index_strategies.py   |  22 +-
 src/mdio/ingestion/segy/pipeline.py           |   2 +
 src/mdio/ingestion/segy/reader.py             |   4 +
 src/mdio/segy/geometry.py                     |  43 +-
 .../ingestion/test_segy_grid_overrides.py     |  24 +-
 .../ingestion/test_segy_index_strategies.py   |  20 +-
 tests/unit/ingestion/test_segy_pipeline.py    |  45 ++
 11 files changed, 446 insertions(+), 389 deletions(-)

diff --git a/docs/tutorials/custom_template.ipynb b/docs/tutorials/custom_template.ipynb
index 32efda96..520bc3a8 100644
--- a/docs/tutorials/custom_template.ipynb
+++ b/docs/tutorials/custom_template.ipynb
@@ -1,302 +1,310 @@
 {
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "85114119ae7a4db0",
-   "metadata": {},
-   "source": [
-    "# MDIO Template Usage\n",
-    "\n",
-    "```{article-info}\n",
-    ":author: Altay Sansal\n",
-    ":date: \"{sub-ref}`today`\"\n",
-    ":read-time: \"{sub-ref}`wordcount-minutes` min read\"\n",
-    ":class-container: sd-p-0 sd-outline-muted sd-rounded-3 sd-font-weight-light\n",
-    "```\n",
-    "\n",
-    "```{warning}\n",
-    "Most SEG-Y files correspond to standard seismic data types or field configurations. We recommend using\n",
-    "the built-in templates from the registry whenever possible. Create a custom template only when your file\n",
-    "is unusual and cannot be represented by existing templates. In many cases, you can simply customize the\n",
-    "SEG-Y header byte mapping during ingestion without defining a new template.\n",
-    "```\n",
-    "\n",
-    "In this tutorial we will walk through the Template Registry and show how to:\n",
-    "\n",
-    "- Discover available templates in the registry\n",
-    "- Define and register your own template\n",
-    "- Build a dataset model and convert it to an Xarray Dataset using your custom template\n",
-    "\n",
-    "If this is your first time with MDIO, you may want to skim the Quickstart first."
-   ]
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# MDIO Template Usage\n",
+        "\n",
+        "```{article-info}\n",
+        ":author: Altay Sansal\n",
+        ":date: \"{sub-ref}`today`\"\n",
+        ":read-time: \"{sub-ref}`wordcount-minutes` min read\"\n",
+        ":class-container: sd-p-0 sd-outline-muted sd-rounded-3 sd-font-weight-light\n",
+        "```\n",
+        "\n",
+        "```{warning}\n",
+        "Most SEG-Y files correspond to standard seismic data types or field configurations. We recommend using\n",
+        "the built-in templates from the registry whenever possible. Create a custom template only when your file\n",
+        "is unusual and cannot be represented by existing templates. In many cases, you can simply customize the\n",
+        "SEG-Y header byte mapping during ingestion without defining a new template.\n",
+        "```\n",
+        "\n",
+        "In this tutorial we will walk through the Template Registry and show how to:\n",
+        "\n",
+        "- Discover available templates in the registry\n",
+        "- Define and register your own template\n",
+        "- Build a dataset model and convert it to an Xarray Dataset using your custom template\n",
+        "\n",
+        "If this is your first time with MDIO, you may want to skim the Quickstart first."
+      ],
+      "id": "85114119ae7a4db0"
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## What is a Template and a Template Registry?\n",
+        "\n",
+        "A template defines how an MDIO dataset is structured: names of dimensions and coordinates, the default variable name, chunking hints, and attributes to be stored. Since many seismic datasets share common structures (e.g., 3D post-stack, 2D post-stack, pre-stack CDP/shot, etc.), MDIO ships with a pre-populated template registry and APIs to fetch or register templates.\n",
+        "\n",
+        "Fetching a template from it returns a copied instance you can freely customize without affecting others."
+      ],
+      "id": "a793f2cfb58f09cc"
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "from mdio.builder.template_registry import get_template\n",
+        "from mdio.builder.template_registry import get_template_registry\n",
+        "from mdio.builder.template_registry import list_templates\n",
+        "\n",
+        "registry = get_template_registry()\n",
+        "registry  # pretty HTML in notebooks"
+      ],
+      "execution_count": null,
+      "outputs": [],
+      "id": "c7a760a019930d4e"
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "We can list all registered templates and get a list as well."
+      ],
+      "id": "810dbba2b6dba787"
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "list_templates()"
+      ],
+      "execution_count": null,
+      "outputs": [],
+      "id": "38eb1da635c7be0f"
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Defining a Minimal Custom Template\n",
+        "\n",
+        "To define a custom template, subclass `AbstractDatasetTemplate` and set:\n",
+        "\n",
+        "- `_name`: a public name for the template\n",
+        "- `_dim_names`: names for each axis of your data variable (the last axis is the trace/time or trace/depth axis)\n",
+        "- `_physical_coord_names` and `_logical_coord_names`: optional additional coordinate variables to store along the spatial grid\n",
+        "- `_load_dataset_attributes()`: optional attributes stored at the dataset level\n",
+        "\n",
+        "Below we create a special template that can hold interval velocity field with multiple anisotropy parameters for a depth seismic volume.\n",
+        "\n",
+        "The dimensions, dimension-coordinates and non-dimension coordinates will automatically get created using the method\n",
+        "from the base class. However, since we want more variables, we override `_add_variables` to add them."
+      ],
+      "id": "d87bd9ec781a8a8e"
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "from mdio.builder.schemas import compressors\n",
+        "from mdio.builder.schemas.chunk_grid import RegularChunkGrid\n",
+        "from mdio.builder.schemas.chunk_grid import RegularChunkShape\n",
+        "from mdio.builder.schemas.dtype import ScalarType\n",
+        "from mdio.builder.schemas.v1.variable import VariableMetadata\n",
+        "from mdio.builder.templates.base import AbstractDatasetTemplate\n",
+        "\n",
+        "\n",
+        "class AnisotropicVelocityTemplate(AbstractDatasetTemplate):\n",
+        "    \"\"\"A custom template that has unusual dimensions and coordinates.\"\"\"\n",
+        "\n",
+        "    def __init__(self, data_domain: str = \"depth\") -> None:\n",
+        "        super().__init__(data_domain)\n",
+        "        # Dimension order matters; the last dimension is the depth\n",
+        "        self._dim_names = (\"inline\", \"crossline\", self.trace_domain)\n",
+        "        # Additional coordinates: these are added on top of dimension coordinates\n",
+        "        self._physical_coord_names = (\"cdp_x\", \"cdp_y\")\n",
+        "        self._var_chunk_shape = (128, 128, 128)\n",
+        "        self._units = {}\n",
+        "\n",
+        "    @property\n",
+        "    def _name(self) -> str:  # public name for the registry\n",
+        "        return \"AnisotropicVelocity3DDepth\"\n",
+        "\n",
+        "    @property\n",
+        "    def _default_variable_name(self) -> str:  # public name for the registry\n",
+        "        return \"velocity\"\n",
+        "\n",
+        "    def _load_dataset_attributes(self) -> dict:\n",
+        "        return {\"surveyType\": \"3D\", \"gatherType\": \"line\"}\n",
+        "\n",
+        "    def _add_variables(self) -> None:\n",
+        "        \"\"\"Add the variables including default and extra.\"\"\"\n",
+        "        for name in [\"velocity\", \"epsilon\", \"delta\"]:\n",
+        "            chunk_grid = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=self.full_chunk_shape))\n",
+        "            unit = self.get_unit_by_key(name)\n",
+        "            self._builder.add_variable(\n",
+        "                name=name,\n",
+        "                dimensions=self._dim_names,\n",
+        "                data_type=ScalarType.FLOAT32,\n",
+        "                compressor=compressors.Blosc(cname=compressors.BloscCname.zstd),\n",
+        "                coordinates=self.physical_coordinate_names,\n",
+        "                metadata=VariableMetadata(chunk_grid=chunk_grid, units_v1=unit),\n",
+        "            )\n",
+        "\n",
+        "\n",
+        "AnisotropicVelocityTemplate()"
+      ],
+      "execution_count": null,
+      "outputs": [],
+      "id": "cfc9d9b0e1b67a76"
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Registering the Custom Template\n",
+        "\n",
+        "The registry returns a deep copy of the template on every fetch. To make the template discoverable by name, register it first, then retrieve it with `get_template`."
+      ],
+      "id": "15e61310ed0ffd97"
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "from mdio.builder.template_registry import register_template\n",
+        "\n",
+        "register_template(AnisotropicVelocityTemplate())\n",
+        "print(\"Registered:\", \"AnisotropicVelocity3DDepth\" in list_templates())\n",
+        "\n",
+        "custom_template = get_template(\"AnisotropicVelocity3DDepth\")\n",
+        "custom_template"
+      ],
+      "execution_count": null,
+      "outputs": [],
+      "id": "a4e1847b20da6768"
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "You can also set units at any time. For this demo we’ll set metric units. The spatial units will be inferred from the SEG-Y binary header during ingestion, but we can override them here. Ingestion will honor what is in the template."
+      ],
+      "id": "83b0772f1913c652"
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "from mdio.builder.schemas.v1.units import LengthUnitModel\n",
+        "from mdio.builder.schemas.v1.units import SpeedUnitModel\n",
+        "\n",
+        "custom_template.add_units(\n",
+        "    {\n",
+        "        \"depth\": LengthUnitModel(length=\"m\"),\n",
+        "        \"cdp_x\": LengthUnitModel(length=\"m\"),\n",
+        "        \"cdp_y\": LengthUnitModel(length=\"m\"),\n",
+        "        \"velocity\": SpeedUnitModel(speed=\"m/s\"),\n",
+        "    }\n",
+        ")\n",
+        "custom_template"
+      ],
+      "execution_count": null,
+      "outputs": [],
+      "id": "d7dca50d72d2f93"
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Changing chunk size (chunks) on an existing template\n",
+        "\n",
+        "Often you will want to tweak the chunking strategy for performance. You can do this in two ways:\n",
+        "\n",
+        "- When defining a subclass, set a default in the constructor (e.g., `self._var_chunk_shape = (...)`).\n",
+        "- On an existing template instance, assign to the `full_chunk_shape` property once you know your final\n",
+        "  dataset sizes (the tuple length must match the number of data dimensions).\n",
+        "\n",
+        "Below is a tiny demo showing how to modify the chunk shape on a fetched template. We first build the\n",
+        "template with known sizes to satisfy validation, then update `full_chunk_shape`.\n",
+        "\n",
+        "```{warning}\n",
+        "`AbstractDatasetTemplate.build_dataset` is deprecated as of 1.2 and is planned for removal in 1.2.5.\n",
+        "SEG-Y ingestion now builds datasets from a resolved schema via the schema-driven factory, so prefer\n",
+        "`mdio.segy_to_mdio` for end-to-end conversion. The `build_dataset` call below remains only to\n",
+        "illustrate the in-memory dataset model.\n",
+        "```\n",
+        "\n",
+        "```{note}\n",
+        "In the SEG-Y to MDIO conversion workflow, MDIO infers the final grid shape from the SEG-Y headers. It’s\n",
+        "common to set or adjust `full_chunk_shape` right before calling `segy_to_mdio`, using the same sizes\n",
+        "you expect for the final array.\n",
+        "```"
+      ],
+      "id": "367ade9824e72bc3"
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "# NOTE: build_dataset is deprecated (removal planned for 1.2.5); shown here for illustration only.\n",
+        "mdio_ds = custom_template.build_dataset(name=\"demo-only\", sizes=(300, 500, 1001))\n",
+        "# pick smaller chunks than the full array for better parallelism and IO\n",
+        "custom_template.full_chunk_shape = (64, 64, 64)\n",
+        "print(\"Chunk shape set to:\", custom_template.full_chunk_shape)\n",
+        "\n",
+        "custom_template"
+      ],
+      "execution_count": null,
+      "outputs": [],
+      "id": "75939231b58c204a"
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Making Dummy Xarray Dataset\n",
+        "\n",
+        "We can now take the MDIO Dataset model and convert it to Xarray with our configuration. If ingesting from SEG-Y, this step\n",
+        "gets executed automatically by the converter before populating the data.\n",
+        "\n",
+        "Note that the whole dataset will be populated with the fill values."
+      ],
+      "id": "a76f17cdf235de13"
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "from mdio.builder.xarray_builder import to_xarray_dataset\n",
+        "\n",
+        "to_xarray_dataset(mdio_ds)"
+      ],
+      "execution_count": null,
+      "outputs": [],
+      "id": "ce3dcf9c7946ea07"
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Recap: Key APIs Used\n",
+        "\n",
+        "- Template registry helpers: `get_template_registry`, `list_templates`, `register_template`, `get_template`\n",
+        "- Base template to subclass: `AbstractDatasetTemplate`\n",
+        "- Make Xarray Dataset from MDIO Data Model: `to_xarray_dataset`\n",
+        "\n",
+        "With these pieces, you can standardize how your seismic data is represented in MDIO and keep ingestion code concise and repeatable.\n"
+      ],
+      "id": "fc05aa3c81f8465c"
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [],
+      "execution_count": null,
+      "outputs": [],
+      "id": "a15848ab5c0811d6"
+    }
+  ],
+  "metadata": {
+    "mystnb": {
+      "execution_mode": "force"
+    }
   },
-  {
-   "cell_type": "markdown",
-   "id": "a793f2cfb58f09cc",
-   "metadata": {},
-   "source": [
-    "## What is a Template and a Template Registry?\n",
-    "\n",
-    "A template defines how an MDIO dataset is structured: names of dimensions and coordinates, the default variable name, chunking hints, and attributes to be stored. Since many seismic datasets share common structures (e.g., 3D post-stack, 2D post-stack, pre-stack CDP/shot, etc.), MDIO ships with a pre-populated template registry and APIs to fetch or register templates.\n",
-    "\n",
-    "Fetching a template from it returns a copied instance you can freely customize without affecting others."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c7a760a019930d4e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from mdio.builder.template_registry import get_template\n",
-    "from mdio.builder.template_registry import get_template_registry\n",
-    "from mdio.builder.template_registry import list_templates\n",
-    "\n",
-    "registry = get_template_registry()\n",
-    "registry  # pretty HTML in notebooks"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "810dbba2b6dba787",
-   "metadata": {},
-   "source": [
-    "We can list all registered templates and get a list as well."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "38eb1da635c7be0f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "list_templates()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d87bd9ec781a8a8e",
-   "metadata": {},
-   "source": [
-    "## Defining a Minimal Custom Template\n",
-    "\n",
-    "To define a custom template, subclass `AbstractDatasetTemplate` and set:\n",
-    "\n",
-    "- `_name`: a public name for the template\n",
-    "- `_dim_names`: names for each axis of your data variable (the last axis is the trace/time or trace/depth axis)\n",
-    "- `_physical_coord_names` and `_logical_coord_names`: optional additional coordinate variables to store along the spatial grid\n",
-    "- `_load_dataset_attributes()`: optional attributes stored at the dataset level\n",
-    "\n",
-    "Below we create a special template that can hold interval velocity field with multiple anisotropy parameters for a depth seismic volume.\n",
-    "\n",
-    "The dimensions, dimension-coordinates and non-dimension coordinates will automatically get created using the method\n",
-    "from the base class. However, since we want more variables, we override `_add_variables` to add them."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cfc9d9b0e1b67a76",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from mdio.builder.schemas import compressors\n",
-    "from mdio.builder.schemas.chunk_grid import RegularChunkGrid\n",
-    "from mdio.builder.schemas.chunk_grid import RegularChunkShape\n",
-    "from mdio.builder.schemas.dtype import ScalarType\n",
-    "from mdio.builder.schemas.v1.variable import VariableMetadata\n",
-    "from mdio.builder.templates.base import AbstractDatasetTemplate\n",
-    "\n",
-    "\n",
-    "class AnisotropicVelocityTemplate(AbstractDatasetTemplate):\n",
-    "    \"\"\"A custom template that has unusual dimensions and coordinates.\"\"\"\n",
-    "\n",
-    "    def __init__(self, data_domain: str = \"depth\") -> None:\n",
-    "        super().__init__(data_domain)\n",
-    "        # Dimension order matters; the last dimension is the depth\n",
-    "        self._dim_names = (\"inline\", \"crossline\", self.trace_domain)\n",
-    "        # Additional coordinates: these are added on top of dimension coordinates\n",
-    "        self._physical_coord_names = (\"cdp_x\", \"cdp_y\")\n",
-    "        self._var_chunk_shape = (128, 128, 128)\n",
-    "        self._units = {}\n",
-    "\n",
-    "    @property\n",
-    "    def _name(self) -> str:  # public name for the registry\n",
-    "        return \"AnisotropicVelocity3DDepth\"\n",
-    "\n",
-    "    @property\n",
-    "    def _default_variable_name(self) -> str:  # public name for the registry\n",
-    "        return \"velocity\"\n",
-    "\n",
-    "    def _load_dataset_attributes(self) -> dict:\n",
-    "        return {\"surveyType\": \"3D\", \"gatherType\": \"line\"}\n",
-    "\n",
-    "    def _add_variables(self) -> None:\n",
-    "        \"\"\"Add the variables including default and extra.\"\"\"\n",
-    "        for name in [\"velocity\", \"epsilon\", \"delta\"]:\n",
-    "            chunk_grid = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=self.full_chunk_shape))\n",
-    "            unit = self.get_unit_by_key(name)\n",
-    "            self._builder.add_variable(\n",
-    "                name=name,\n",
-    "                dimensions=self._dim_names,\n",
-    "                data_type=ScalarType.FLOAT32,\n",
-    "                compressor=compressors.Blosc(cname=compressors.BloscCname.zstd),\n",
-    "                coordinates=self.physical_coordinate_names,\n",
-    "                metadata=VariableMetadata(chunk_grid=chunk_grid, units_v1=unit),\n",
-    "            )\n",
-    "\n",
-    "\n",
-    "AnisotropicVelocityTemplate()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "15e61310ed0ffd97",
-   "metadata": {},
-   "source": [
-    "## Registering the Custom Template\n",
-    "\n",
-    "The registry returns a deep copy of the template on every fetch. To make the template discoverable by name, register it first, then retrieve it with `get_template`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a4e1847b20da6768",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from mdio.builder.template_registry import register_template\n",
-    "\n",
-    "register_template(AnisotropicVelocityTemplate())\n",
-    "print(\"Registered:\", \"AnisotropicVelocity3DDepth\" in list_templates())\n",
-    "\n",
-    "custom_template = get_template(\"AnisotropicVelocity3DDepth\")\n",
-    "custom_template"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "83b0772f1913c652",
-   "metadata": {},
-   "source": [
-    "You can also set units at any time. For this demo we’ll set metric units. The spatial units will be inferred from the SEG-Y binary header during ingestion, but we can override them here. Ingestion will honor what is in the template."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d7dca50d72d2f93",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from mdio.builder.schemas.v1.units import LengthUnitModel\n",
-    "from mdio.builder.schemas.v1.units import SpeedUnitModel\n",
-    "\n",
-    "custom_template.add_units(\n",
-    "    {\n",
-    "        \"depth\": LengthUnitModel(length=\"m\"),\n",
-    "        \"cdp_x\": LengthUnitModel(length=\"m\"),\n",
-    "        \"cdp_y\": LengthUnitModel(length=\"m\"),\n",
-    "        \"velocity\": SpeedUnitModel(speed=\"m/s\"),\n",
-    "    }\n",
-    ")\n",
-    "custom_template"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "367ade9824e72bc3",
-   "metadata": {},
-   "source": [
-    "## Changing chunk size (chunks) on an existing template\n",
-    "\n",
-    "Often you will want to tweak the chunking strategy for performance. You can do this in two ways:\n",
-    "\n",
-    "- When defining a subclass, set a default in the constructor (e.g., `self._var_chunk_shape = (...)`).\n",
-    "- On an existing template instance, assign to the `full_chunk_shape` property once you know your final\n",
-    "  dataset sizes (the tuple length must match the number of data dimensions).\n",
-    "\n",
-    "Below is a tiny demo showing how to modify the chunk shape on a fetched template. We first build the\n",
-    "template with known sizes to satisfy validation, then update `full_chunk_shape`.\n",
-    "\n",
-    "```{note}\n",
-    "In the SEG-Y to MDIO conversion workflow, MDIO infers the final grid shape from the SEG-Y headers. It’s\n",
-    "common to set or adjust `full_chunk_shape` right before calling `segy_to_mdio`, using the same sizes\n",
-    "you expect for the final array.\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "75939231b58c204a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mdio_ds = custom_template.build_dataset(name=\"demo-only\", sizes=(300, 500, 1001))\n",
-    "# pick smaller chunks than the full array for better parallelism and IO\n",
-    "custom_template.full_chunk_shape = (64, 64, 64)\n",
-    "print(\"Chunk shape set to:\", custom_template.full_chunk_shape)\n",
-    "\n",
-    "custom_template"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a76f17cdf235de13",
-   "metadata": {},
-   "source": [
-    "## Making Dummy Xarray Dataset\n",
-    "\n",
-    "We can now take the MDIO Dataset model and convert it to Xarray with our configuration. If ingesting from SEG-Y, this step\n",
-    "gets executed automatically by the converter before populating the data.\n",
-    "\n",
-    "Note that the whole dataset will be populated with the fill values."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ce3dcf9c7946ea07",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from mdio.builder.xarray_builder import to_xarray_dataset\n",
-    "\n",
-    "to_xarray_dataset(mdio_ds)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fc05aa3c81f8465c",
-   "metadata": {},
-   "source": [
-    "## Recap: Key APIs Used\n",
-    "\n",
-    "- Template registry helpers: `get_template_registry`, `list_templates`, `register_template`, `get_template`\n",
-    "- Base template to subclass: `AbstractDatasetTemplate`\n",
-    "- Make Xarray Dataset from MDIO Data Model: `to_xarray_dataset`\n",
-    "\n",
-    "With these pieces, you can standardize how your seismic data is represented in MDIO and keep ingestion code concise and repeatable.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a15848ab5c0811d6",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "mystnb": {
-   "execution_mode": "force"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/src/mdio/builder/templates/base.py b/src/mdio/builder/templates/base.py
index ed23668b..8e82db1d 100644
--- a/src/mdio/builder/templates/base.py
+++ b/src/mdio/builder/templates/base.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import logging
 from abc import ABC
 from abc import abstractmethod
 from typing import TYPE_CHECKING
@@ -24,6 +25,8 @@
     from mdio.builder.schemas.v1.dataset import Dataset
     from mdio.builder.templates.types import SeismicDataDomain
 
+logger = logging.getLogger(__name__)
+
 
 class AbstractDatasetTemplate(ABC):
     """Abstract base class that defines the template method for Dataset building factory.
@@ -46,12 +49,6 @@ def __init__(self, data_domain: SeismicDataDomain) -> None:
         self._var_chunk_shape: tuple[int, ...] = ()
         self.synthesize_missing_dims: tuple[str, ...] = ()
 
-        # TEMPORARY (removed with declare_coordinate_specs): set when grid overrides mutate this
-        # template in-place (dims collapsed into 'trace', extra coordinates added). Once mutated,
-        # the runtime layout intentionally diverges from the static declare_coordinate_specs()
-        # contract, so the drift guard in build_dataset() must not run.
-        self._grid_overrides_applied: bool = False
-
         self._builder: MDIODatasetBuilder | None = None
         self._dim_sizes: tuple[int, ...] = ()
         self._units: dict[str, AllUnitModel] = {}
@@ -154,6 +151,12 @@ def build_dataset(
     ) -> Dataset:
         """Template method that builds the dataset.
 
+        .. deprecated:: 1.2
+            ``build_dataset`` is deprecated and is planned for removal in 1.2.5. SEG-Y
+            ingestion now builds datasets from a resolved schema via the schema-driven
+            factory (:func:`mdio.ingestion.dataset_factory.build_mdio_dataset`); use
+            :func:`mdio.segy_to_mdio` for ingestion.
+
         Args:
             name: The name of the dataset.
             sizes: The sizes of the dimensions.
@@ -165,6 +168,11 @@ def build_dataset(
         Raises:
             ValueError: If coordinate already exists from subclass override.
         """
+        logger.warning(
+            "AbstractDatasetTemplate.build_dataset is deprecated as of 1.2 and is planned for "
+            "removal in 1.2.5; SEG-Y ingestion builds datasets via the schema-driven factory. "
+            "Use `mdio.segy_to_mdio` for ingestion."
+        )
         self._dim_sizes = sizes
 
         attributes = self._load_dataset_attributes() or {}
@@ -186,10 +194,7 @@ def build_dataset(
             except ValueError as exc:  # coordinate may already exist
                 if "same name twice" not in str(exc):
                     raise
-        # Skip the static drift guard when grid overrides have transformed the template: the
-        # runtime layout no longer matches the declared (override-free) specs by design.
-        if not self._grid_overrides_applied:
-            self._validate_declared_coordinate_specs()
+        self._validate_declared_coordinate_specs()
         self._add_variables()
         self._add_trace_mask()
 
@@ -206,30 +211,6 @@ def add_units(self, units: dict[str, AllUnitModel]) -> None:
                 raise ValueError(msg)
         self._units |= units
 
-    def apply_resolved_dimensions(
-        self,
-        dim_names: tuple[str, ...],
-        chunk_shape: tuple[int, ...],
-    ) -> None:
-        """Update the template's dimension layout from a resolved schema.
-
-        Supported entry point for the ingestion pipeline to push back dimension names
-        and chunk shape after the SchemaResolver has applied grid overrides
-        (e.g. NonBinned, HasDuplicates), instead of mutating private attributes.
-
-        Args:
-            dim_names: Final ordered dimension names.
-            chunk_shape: Chunk shape matching ``dim_names`` length.
-
-        Raises:
-            ValueError: If ``len(chunk_shape) != len(dim_names)``.
-        """
-        if len(chunk_shape) != len(dim_names):
-            msg = f"chunk_shape length {len(chunk_shape)} does not match dim_names length {len(dim_names)}"
-            raise ValueError(msg)
-        self._dim_names = tuple(dim_names)
-        self._var_chunk_shape = tuple(chunk_shape)
-
     def _validate_declared_coordinate_specs(self) -> None:
         """Fail the build if :meth:`declare_coordinate_specs` drifted from the built coordinates.
 
@@ -238,11 +219,8 @@ def _validate_declared_coordinate_specs(self) -> None:
         :meth:`_add_coordinates`, this guard ensures the two never diverge in name, dimensions,
         or dtype. The ingestion ``SchemaResolver`` trusts the declared specs, so silent drift
         would corrupt resolved schemas. The check runs for every template (built-in and
-        user-defined) on every ``build_dataset`` call that does not apply grid overrides. Grid
-        overrides mutate the template in-place (collapsing dims into ``trace`` and adding
-        coordinates), so the runtime layout intentionally diverges from the declared specs and
-        the guard is skipped for those builds. It is removed once ``_add_coordinates`` is derived
-        from the resolved schema and the duplication no longer exists.
+        user-defined) on every ``build_dataset`` call. It is removed once ``_add_coordinates``
+        is derived from the resolved schema and the duplication no longer exists.
 
         Raises:
             ValueError: If the declared specs do not match the built non-dimension coordinates.
diff --git a/src/mdio/builder/templates/seismic_3d_obn.py b/src/mdio/builder/templates/seismic_3d_obn.py
index de58ae53..4ed6c334 100644
--- a/src/mdio/builder/templates/seismic_3d_obn.py
+++ b/src/mdio/builder/templates/seismic_3d_obn.py
@@ -25,8 +25,8 @@ class Seismic3DObnReceiverGathersTemplate(AbstractDatasetTemplate):
     Special handling for component dimension:
         If the SEG-Y spec does not contain a 'component' field, the ingestion process
         will automatically synthesize a component dimension with constant value 1 for
-        all traces. A warning is logged when this occurs. This is handled explicitly
-        in GridOverrider._synthesize_obn_component().
+        all traces. A warning is logged when this occurs. This is driven by
+        ``synthesize_missing_dims`` and handled by ``ComponentSynthesisStrategy``.
     """
 
     def __init__(self, data_domain: SeismicDataDomain = "time"):
diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py
index 42ee7d7b..d116799a 100644
--- a/src/mdio/converters/segy.py
+++ b/src/mdio/converters/segy.py
@@ -35,8 +35,8 @@ def _coerce_grid_overrides(
         return grid_overrides
 
     logger.warning(
-        "Passing `grid_overrides` as a dict is deprecated and will be removed in a "
-        "future release; pass a `mdio.GridOverrides` instance instead."
+        "Passing `grid_overrides` as a dict is deprecated as of 1.2 and is planned for removal "
+        "in 1.2.5; pass a `mdio.GridOverrides` instance instead."
     )
     return GridOverrides.model_validate(grid_overrides)
 
@@ -61,7 +61,8 @@ def segy_to_mdio(  # noqa: PLR0913
         output_path: The universal path for the output MDIO v1 file.
         overwrite: Whether to overwrite the output file if it already exists. Defaults to False.
         grid_overrides: Option to add grid overrides. Prefer a :class:`mdio.GridOverrides`
-            instance; ``dict`` is still accepted but emits a :class:`DeprecationWarning`.
+            instance; ``dict`` is still accepted (deprecated as of 1.2, planned for removal in
+            1.2.5) but logs a deprecation warning.
         segy_header_overrides: Option to override specific SEG-Y headers during ingestion.
     """
     typed_grid_overrides = _coerce_grid_overrides(grid_overrides)
diff --git a/src/mdio/ingestion/segy/index_strategies.py b/src/mdio/ingestion/segy/index_strategies.py
index 8e533f3e..5d97d6e0 100644
--- a/src/mdio/ingestion/segy/index_strategies.py
+++ b/src/mdio/ingestion/segy/index_strategies.py
@@ -4,8 +4,7 @@
 single-responsibility `IndexStrategy` objects that can be composed via `CompositeStrategy`.
 
 Strategies are selected by `IndexStrategyRegistry` from the typed `GridOverrides`
-configuration plus optional template hints. The public contract preserved by `GridOverrider`
-(a thin shim around this module) keeps end-to-end ingestion behavior identical.
+configuration plus optional template hints, preserving end-to-end ingestion behavior.
 """
 
 from __future__ import annotations
@@ -50,7 +49,7 @@ class IndexStrategy(ABC):
     header values; subclasses override only when they need different semantics
     (currently just `CompositeStrategy`).
 
-    Subclasses with header preconditions set `required_keys` so the shim and
+    Subclasses with header preconditions set `required_keys` so the ingestion reader and
     `CompositeStrategy` can raise `GridOverrideKeysError` with a clear
     "missing fields" message before NumPy fails on a deeper key lookup.
     """
@@ -66,7 +65,7 @@ def required_keys(self) -> frozenset[str]:
     def validate_headers(self, headers: HeaderArray) -> None:
         """Raise `GridOverrideKeysError` if any required header field is missing.
 
-        Callers (the `GridOverrider` shim and `CompositeStrategy`) invoke this before each
+        Callers (the ingestion reader and `CompositeStrategy`) invoke this before each
         transform so failure points at the user-facing override name rather than at a NumPy
         structured-array key error.
         """
@@ -155,12 +154,12 @@ def transform_headers(self, headers: HeaderArray) -> HeaderArray:
 class NonBinnedStrategy(DuplicateHandlingStrategy):
     """Collapse selected non-binned dimensions into a single `trace` dimension.
 
-    Inherits the per-tuple `trace` counter from `DuplicateHandlingStrategy` and
-    captures `chunksize` so the `GridOverrider` shim can size the new `trace` chunk correctly.
+    Inherits the per-tuple `trace` counter from `DuplicateHandlingStrategy`, excluding the
+    collapsed dims from the grouping so the counter only varies along the remaining dims.
+    The `trace` chunk size is owned by the schema reshaping (`CollapseToTraceEffect`), not
+    by this strategy.
 
     Args:
-        chunksize: Chunk size to assign to the `trace` dimension. The strategy itself
-            does not apply this value; the shim uses it when rewriting the chunksize tuple.
         non_binned_dims: Header fields collapsed into `trace`. They are excluded from
             the duplicate grouping so the counter only varies along the remaining dims.
         coord_fields: Template coordinate names to exclude from grouping.
@@ -169,19 +168,15 @@ class NonBinnedStrategy(DuplicateHandlingStrategy):
 
     def __init__(
         self,
-        chunksize: int,
         non_binned_dims: Iterable[str],
         coord_fields: Iterable[str] = (),
         dtype: DTypeLike = np.int16,
     ) -> None:
-        non_binned_dims = tuple(non_binned_dims)
         super().__init__(
             coord_fields=coord_fields,
-            excluded_fields=non_binned_dims,
+            excluded_fields=tuple(non_binned_dims),
             dtype=dtype,
         )
-        self.chunksize = chunksize
-        self.non_binned_dims = non_binned_dims
 
 
 class ChannelWrappingStrategy(IndexStrategy):
@@ -421,7 +416,6 @@ def create_strategy(
             if grid_overrides.non_binned:
                 strategies.append(
                     NonBinnedStrategy(
-                        chunksize=grid_overrides.chunksize,
                         non_binned_dims=grid_overrides.non_binned_dims or (),
                         coord_fields=coord_fields,
                     )
diff --git a/src/mdio/ingestion/segy/pipeline.py b/src/mdio/ingestion/segy/pipeline.py
index 0752d360..5fdb60a5 100644
--- a/src/mdio/ingestion/segy/pipeline.py
+++ b/src/mdio/ingestion/segy/pipeline.py
@@ -23,6 +23,7 @@
 from mdio.ingestion.segy.serializer import serialize_to_mdio
 from mdio.ingestion.segy.validation import validate_spec_in_template
 from mdio.segy.file import get_segy_file_info
+from mdio.segy.geometry import validate_overrides_for_template
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -142,6 +143,7 @@ def segy_to_mdio(  # noqa: PLR0913
         GridTraceCountError: If the live trace count in the built grid does not match
             the number of traces reported by the SEG-Y file.
     """
+    validate_overrides_for_template(grid_overrides, mdio_template)
     validate_spec_in_template(segy_spec, mdio_template)
 
     input_path = _normalize_path(input_path)
diff --git a/src/mdio/ingestion/segy/reader.py b/src/mdio/ingestion/segy/reader.py
index 990966c9..9f64d50a 100644
--- a/src/mdio/ingestion/segy/reader.py
+++ b/src/mdio/ingestion/segy/reader.py
@@ -71,6 +71,10 @@ def read_index_headers(  # noqa: PLR0913
     )
     logger.info("Using index strategy: %s", strategy.name)
 
+    # Validate up front so a missing required field surfaces as a clear GridOverrideKeysError
+    # naming the override, rather than a deep NumPy key error inside the transform. Composite
+    # strategies additionally re-validate each child against the running header array.
+    strategy.validate_headers(parsed_headers)
     indexed_headers = strategy.transform_headers(parsed_headers)
 
     # 4. Compute spatial dimensions
diff --git a/src/mdio/segy/geometry.py b/src/mdio/segy/geometry.py
index aa24270a..b8cb88de 100644
--- a/src/mdio/segy/geometry.py
+++ b/src/mdio/segy/geometry.py
@@ -15,6 +15,9 @@
 from pydantic import BaseModel
 from pydantic import ConfigDict
 from pydantic import Field
+from pydantic import model_validator
+
+from mdio.segy.exceptions import GridOverrideMissingParameterError
 
 if TYPE_CHECKING:
     from mdio.builder.templates.base import AbstractDatasetTemplate
@@ -63,6 +66,31 @@ class GridOverrides(BaseModel):
         description="Dimension names to collapse into the trace dimension when `non_binned` is True.",
     )
 
+    @model_validator(mode="after")
+    def _check_non_binned_parameters(self) -> GridOverrides:
+        """Require the parameters ``non_binned`` depends on.
+
+        ``chunksize`` and ``non_binned_dims`` are only meaningful when collapsing dims into a
+        ``trace`` axis. Enforcing the dependency on the model means every construction path
+        (typed instance or a coerced legacy dict) fails fast with the same error, so the
+        ingestion pipeline does not need to re-check it.
+
+        Raises:
+            GridOverrideMissingParameterError: When ``non_binned`` is set without both
+                ``chunksize`` and ``non_binned_dims``.
+        """
+        if not self.non_binned:
+            return self
+        missing: set[str] = set()
+        if self.chunksize is None:
+            missing.add("chunksize")
+        if not self.non_binned_dims:
+            missing.add("non_binned_dims")
+        if missing:
+            command = "NonBinned"
+            raise GridOverrideMissingParameterError(command, missing)
+        return self
+
     def __bool__(self) -> bool:
         """Return True if any override flag is enabled."""
         return (
@@ -74,7 +102,7 @@ def __bool__(self) -> bool:
         )
 
     def to_legacy_dict(self) -> dict[str, Any]:
-        """Dump to the legacy ``CamelCase`` dict shape consumed by :class:`GridOverrider`."""
+        """Dump to the legacy ``CamelCase`` dict shape stored in dataset metadata."""
         return self.model_dump(by_alias=True, exclude_defaults=True)
 
 
@@ -95,24 +123,29 @@ def _resolve_synthesize_dims(template: AbstractDatasetTemplate | None) -> tuple[
     return ()
 
 
-def _validate_template_for_overrides(
-    config: GridOverrides,
+def validate_overrides_for_template(
+    config: GridOverrides | None,
     template: AbstractDatasetTemplate | None,
 ) -> None:
     """Reject grid override / template pairings that v1.1 forbade.
 
     ``auto_shot_wrap`` is streamer-only and ``calculate_shot_index`` is OBN-only; using
     either with the wrong template silently produced wrong shot indices in v1.1 unless
-    the per-command validator caught it. This function restores that guard.
+    the per-command validator caught it. This is the one guard the :class:`GridOverrides`
+    model cannot enforce on its own (it depends on the chosen template), so the ingestion
+    pipeline calls it before any header parsing.
 
     Args:
-        config: Typed grid overrides extracted from the user's legacy dict.
+        config: Typed grid overrides, or ``None`` when no overrides were requested.
         template: Template chosen by the caller, or ``None`` if omitted.
 
     Raises:
         TypeError: When ``auto_shot_wrap`` is set without a streamer template, or
             ``calculate_shot_index`` is set without an OBN receiver-gathers template.
     """
+    if not config:
+        return
+
     if config.auto_shot_wrap:
         # Lazy import: see ``_resolve_synthesize_dims`` for the cycle rationale.
         from mdio.builder.templates.seismic_3d_streamer_field import (  # noqa: PLC0415
diff --git a/tests/unit/ingestion/test_segy_grid_overrides.py b/tests/unit/ingestion/test_segy_grid_overrides.py
index 34055fcc..d17a27df 100644
--- a/tests/unit/ingestion/test_segy_grid_overrides.py
+++ b/tests/unit/ingestion/test_segy_grid_overrides.py
@@ -20,7 +20,7 @@
 from mdio.segy.exceptions import GridOverrideUnknownError
 from mdio.segy.geometry import GridOverrides
 from mdio.segy.geometry import _resolve_synthesize_dims
-from mdio.segy.geometry import _validate_template_for_overrides
+from mdio.segy.geometry import validate_overrides_for_template
 
 if TYPE_CHECKING:
     from mdio.builder.templates.base import AbstractDatasetTemplate
@@ -47,19 +47,13 @@ def run_override(
         if key not in valid_keys:
             raise GridOverrideUnknownError(key)
 
+    # NonBinned parameter requirements are enforced by the GridOverrides model itself, so
+    # model_validate raises here for an under-specified NonBinned config (matching production).
     config = GridOverrides.model_validate(grid_overrides)
 
-    if config.non_binned:
-        missing: set[str] = set()
-        if config.chunksize is None:
-            missing.add("chunksize")
-        if not config.non_binned_dims:
-            missing.add("non_binned_dims")
-        if missing:
-            command = "NonBinned"
-            raise GridOverrideMissingParameterError(command, missing)
-
-    _validate_template_for_overrides(config, template)
+    # Template-compatibility is the one guard the model cannot self-enforce; mirror the
+    # production pipeline so this helper cannot drift from what `segy_to_mdio` runs.
+    validate_overrides_for_template(config, template)
 
     synthesize_dims = _resolve_synthesize_dims(template)
     registry = IndexStrategyRegistry()
@@ -184,6 +178,12 @@ def test_non_binned(self, mock_streamer_headers: dict[str, npt.NDArray]) -> None
         expected_trace_coords = np.arange(1, 21, dtype="int32")
         assert_array_equal(dims[2].coords, expected_trace_coords)
 
+    def test_non_binned_missing_parameters_raises(self, mock_streamer_headers: dict[str, npt.NDArray]) -> None:
+        """NonBinned without `chunksize`/`non_binned_dims` raises GridOverrideMissingParameterError."""
+        index_names = ("shot_point", "cable")
+        with pytest.raises(GridOverrideMissingParameterError, match="chunksize"):
+            run_override({"NonBinned": True}, index_names, mock_streamer_headers)
+
 
 class TestStreamerGridOverrides:
     """Check grid overrides for shot data with streamer acquisition."""
diff --git a/tests/unit/ingestion/test_segy_index_strategies.py b/tests/unit/ingestion/test_segy_index_strategies.py
index 70384ed3..14bfbf9f 100644
--- a/tests/unit/ingestion/test_segy_index_strategies.py
+++ b/tests/unit/ingestion/test_segy_index_strategies.py
@@ -29,7 +29,7 @@
 from mdio.segy.exceptions import GridOverrideKeysError
 from mdio.segy.geometry import GridOverrides
 from mdio.segy.geometry import _resolve_synthesize_dims
-from mdio.segy.geometry import _validate_template_for_overrides
+from mdio.segy.geometry import validate_overrides_for_template
 
 
 class GridOverrider:
@@ -44,7 +44,7 @@ def run(
     ) -> np.ndarray:
         """Run mock strategy validation and transform."""
         config = GridOverrides.model_validate(grid_overrides)
-        _validate_template_for_overrides(config, template)
+        validate_overrides_for_template(config, template)
         synthesize_dims = _resolve_synthesize_dims(template)
         registry = IndexStrategyRegistry()
         strategy = registry.create_strategy(
@@ -93,12 +93,11 @@ def test_synthesize_dims_only(self) -> None:
         assert strategy.synthesize_dims == ("component",)
 
     def test_non_binned_only(self) -> None:
-        """``non_binned`` -> NonBinnedStrategy with chunksize and excluded dims wired."""
+        """``non_binned`` -> NonBinnedStrategy with the collapsed dims excluded from grouping."""
         overrides = GridOverrides(non_binned=True, chunksize=64, non_binned_dims=["channel"])
         strategy = IndexStrategyRegistry().create_strategy(grid_overrides=overrides)
         assert isinstance(strategy, NonBinnedStrategy)
-        assert strategy.chunksize == 64
-        assert strategy.non_binned_dims == ("channel",)
+        assert strategy.excluded_fields == frozenset({"channel"})
 
     def test_has_duplicates_only(self) -> None:
         """``has_duplicates`` -> DuplicateHandlingStrategy."""
@@ -259,12 +258,6 @@ def test_excluded_fields_dropped_from_grouping(self) -> None:
 class TestNonBinnedStrategy:
     """``NonBinned`` is the duplicate counter wired with explicit collapse dims."""
 
-    def test_chunksize_and_dims_recorded(self) -> None:
-        """Constructor stores both for the ``GridOverrider`` shim to use."""
-        strategy = NonBinnedStrategy(chunksize=4, non_binned_dims=("channel",))
-        assert strategy.chunksize == 4
-        assert strategy.non_binned_dims == ("channel",)
-
     def test_collapse_dim_excluded_from_counter(self) -> None:
         """The non-binned dim must NOT participate in the duplicate counter grouping."""
         headers = _make_struct(
@@ -274,7 +267,7 @@ def test_collapse_dim_excluded_from_counter(self) -> None:
                 "channel": np.array([10, 11, 10, 11], dtype=np.int32),
             }
         )
-        strategy = NonBinnedStrategy(chunksize=4, non_binned_dims=("channel",))
+        strategy = NonBinnedStrategy(non_binned_dims=("channel",))
         out = strategy.transform_headers(headers)
         # Each (shot_point, cable) tuple appears once -> counters all equal 1.
         assert "trace" in out.dtype.names
@@ -290,7 +283,6 @@ def test_coord_fields_also_excluded(self) -> None:
             }
         )
         strategy = NonBinnedStrategy(
-            chunksize=4,
             non_binned_dims=("channel",),
             coord_fields=("cdp_x",),
         )
@@ -481,7 +473,7 @@ def test_strategies_run_in_order(self) -> None:
         composite = CompositeStrategy(
             [
                 ComponentSynthesisStrategy(("component",)),
-                NonBinnedStrategy(chunksize=4, non_binned_dims=("channel",)),
+                NonBinnedStrategy(non_binned_dims=("channel",)),
             ]
         )
         out = composite.transform_headers(headers)
diff --git a/tests/unit/ingestion/test_segy_pipeline.py b/tests/unit/ingestion/test_segy_pipeline.py
index 296887e7..6b7e0697 100644
--- a/tests/unit/ingestion/test_segy_pipeline.py
+++ b/tests/unit/ingestion/test_segy_pipeline.py
@@ -6,10 +6,13 @@
 
 import pytest
 
+from mdio.builder.template_registry import TemplateRegistry
+from mdio.converters.segy import segy_to_mdio
 from mdio.ingestion.schema import DimensionSpec
 from mdio.ingestion.schema import ResolvedSchema
 from mdio.ingestion.segy import pipeline
 from mdio.ingestion.segy.raw_headers import build_raw_header_variables
+from mdio.segy.exceptions import GridOverrideMissingParameterError
 
 
 def _schema(dimensions: list[DimensionSpec]) -> ResolvedSchema:
@@ -74,6 +77,48 @@ def test_raises_when_calculated_dim_missing(self) -> None:
             pipeline._verify_calculated_dimensions(schema, produced, "Toy")
 
 
+class TestGridOverrideValidationThroughPublicApi:
+    """The public ``segy_to_mdio`` must reject bad override/template pairings.
+
+    These exercise the shipped entry point (not a private helper) so the guards cannot
+    silently drop out of the pipeline again. Validation runs before any file IO, so the
+    dummy paths and ``segy_spec`` are never dereferenced.
+    """
+
+    def test_auto_shot_wrap_with_non_streamer_template_raises(self) -> None:
+        """``AutoShotWrap`` is streamer-only; an OBN template must raise TypeError."""
+        with pytest.raises(TypeError, match="auto_shot_wrap only supports"):
+            segy_to_mdio(
+                segy_spec=None,
+                mdio_template=TemplateRegistry().get("ObnReceiverGathers3D"),
+                input_path="in.segy",
+                output_path="out.mdio",
+                grid_overrides={"AutoShotWrap": True},
+            )
+
+    def test_calculate_shot_index_with_non_obn_template_raises(self) -> None:
+        """``CalculateShotIndex`` is OBN-only; a streamer template must raise TypeError."""
+        with pytest.raises(TypeError, match="calculate_shot_index only supports"):
+            segy_to_mdio(
+                segy_spec=None,
+                mdio_template=TemplateRegistry().get("StreamerFieldRecords3D"),
+                input_path="in.segy",
+                output_path="out.mdio",
+                grid_overrides={"CalculateShotIndex": True},
+            )
+
+    def test_non_binned_without_parameters_raises(self) -> None:
+        """``NonBinned`` without ``chunksize``/``non_binned_dims`` must raise before IO."""
+        with pytest.raises(GridOverrideMissingParameterError, match="chunksize"):
+            segy_to_mdio(
+                segy_spec=None,
+                mdio_template=TemplateRegistry().get("ObnReceiverGathers3D"),
+                input_path="in.segy",
+                output_path="out.mdio",
+                grid_overrides={"NonBinned": True},
+            )
+
+
 class TestBuildRawHeaderVariables:
     """Tests for the isolated experimental raw-headers feature."""
 

From cf741937b94c1c953bf357013df8ae89f83e1079 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Thu, 4 Jun 2026 16:37:10 +0000
Subject: [PATCH 11/11] Further cleanup

---
 src/mdio/ingestion/segy/index_strategies.py   | 61 ++++++++++++++-----
 tests/unit/ingestion/test_metadata.py         |  4 +-
 tests/unit/ingestion/test_schema_effects.py   | 12 +---
 tests/unit/ingestion/test_schema_resolver.py  | 12 ++--
 .../ingestion/test_segy_index_strategies.py   | 20 +++++-
 5 files changed, 75 insertions(+), 34 deletions(-)

diff --git a/src/mdio/ingestion/segy/index_strategies.py b/src/mdio/ingestion/segy/index_strategies.py
index 5d97d6e0..fe895c31 100644
--- a/src/mdio/ingestion/segy/index_strategies.py
+++ b/src/mdio/ingestion/segy/index_strategies.py
@@ -89,6 +89,16 @@ def compute_dimensions(self, headers: HeaderArray, dim_names: tuple[str, ...]) -
             Dimension(coords=np.unique(headers[name]), name=name) for name in dim_names if name in headers.dtype.names
         ]
 
+    def schema_effect(self) -> SchemaEffect | None:
+        """Schema reshape this strategy implies, or ``None`` if it leaves the layout unchanged.
+
+        Most strategies only transform headers. Only strategies that introduce a ``trace``
+        dimension the template did not declare (see :class:`DuplicateHandlingStrategy` and
+        :class:`NonBinnedStrategy`) reshape the resolved schema. Co-locating the reshape with
+        the header transform keeps the two views of an override from drifting.
+        """
+        return None
+
     @property
     def name(self) -> str:
         """Return the strategy's class name; useful for logging and tests."""
@@ -150,16 +160,22 @@ def transform_headers(self, headers: HeaderArray) -> HeaderArray:
         trace_values = np.array(with_trace["trace"])
         return rfn.append_fields(headers, "trace", trace_values, usemask=False)
 
+    def schema_effect(self) -> SchemaEffect:
+        """Insert a chunksize-1 ``trace`` dimension to disambiguate duplicate index tuples."""
+        return InsertTraceDimEffect(chunksize=1)
+
 
 class NonBinnedStrategy(DuplicateHandlingStrategy):
     """Collapse selected non-binned dimensions into a single `trace` dimension.
 
     Inherits the per-tuple `trace` counter from `DuplicateHandlingStrategy`, excluding the
-    collapsed dims from the grouping so the counter only varies along the remaining dims.
-    The `trace` chunk size is owned by the schema reshaping (`CollapseToTraceEffect`), not
-    by this strategy.
+    collapsed dims from the grouping so the counter only varies along the remaining dims, and
+    owns the matching schema reshape (`CollapseToTraceEffect`). Both views of the override --
+    the header transform and the schema layout, including the `trace` chunk size -- are
+    therefore defined together here.
 
     Args:
+        chunksize: Chunk size assigned to the inserted `trace` dimension by the schema effect.
         non_binned_dims: Header fields collapsed into `trace`. They are excluded from
             the duplicate grouping so the counter only varies along the remaining dims.
         coord_fields: Template coordinate names to exclude from grouping.
@@ -168,15 +184,23 @@ class NonBinnedStrategy(DuplicateHandlingStrategy):
 
     def __init__(
         self,
+        chunksize: int,
         non_binned_dims: Iterable[str],
         coord_fields: Iterable[str] = (),
         dtype: DTypeLike = np.int16,
     ) -> None:
+        collapse_dims = tuple(non_binned_dims)
         super().__init__(
             coord_fields=coord_fields,
-            excluded_fields=tuple(non_binned_dims),
+            excluded_fields=collapse_dims,
             dtype=dtype,
         )
+        self._chunksize = chunksize
+        self._collapse_dims = collapse_dims
+
+    def schema_effect(self) -> SchemaEffect:
+        """Collapse the non-binned dims into a ``trace`` dimension sized by ``chunksize``."""
+        return CollapseToTraceEffect(chunksize=self._chunksize, collapse_dims=self._collapse_dims)
 
 
 class ChannelWrappingStrategy(IndexStrategy):
@@ -335,21 +359,30 @@ def compute_dimensions(self, headers: HeaderArray, dim_names: tuple[str, ...]) -
         """Delegate to the final child strategy."""
         return self.strategies[-1].compute_dimensions(headers, dim_names)
 
+    def schema_effect(self) -> SchemaEffect | None:
+        """Return the single child reshape; at most one composed strategy produces one."""
+        for strategy in self.strategies:
+            effect = strategy.schema_effect()
+            if effect is not None:
+                return effect
+        return None
+
 
 class IndexStrategyRegistry:
     """Picks the right `IndexStrategy` from grid overrides and template hints.
 
-    The registry is the single source of truth for override semantics: it maps a
-    `GridOverrides` both to the header-transforming `IndexStrategy` (`create_strategy`)
-    and to the schema-reshaping `SchemaEffect` (`schema_effect`). Keeping both derivations
-    here prevents the header view and the schema view from drifting.
+    The registry maps a `GridOverrides` to the header-transforming `IndexStrategy` in one
+    place (`create_strategy`). The schema-reshaping `SchemaEffect` is not selected by a
+    second switch: it is read off that same strategy (`schema_effect`), so the header view
+    and the schema view of an override cannot drift.
     """
 
     def schema_effect(self, grid_overrides: GridOverrides | None) -> SchemaEffect | None:
         """Return the schema reshaping implied by `grid_overrides`, if any.
 
-        Only `non_binned` and `has_duplicates` change the dimension/coordinate layout;
-        every other override transforms headers in place without reshaping the schema.
+        Derived from the same strategy that will transform headers, so layout changes stay in
+        lock-step with the header transform. Template and synthesis hints do not affect the
+        reshape, so they are omitted when building the strategy here.
 
         Args:
             grid_overrides: Typed grid override configuration, or `None`.
@@ -359,12 +392,7 @@ def schema_effect(self, grid_overrides: GridOverrides | None) -> SchemaEffect |
         """
         if not grid_overrides:
             return None
-        if grid_overrides.non_binned:
-            collapse_dims = None if grid_overrides.non_binned_dims is None else tuple(grid_overrides.non_binned_dims)
-            return CollapseToTraceEffect(chunksize=grid_overrides.chunksize, collapse_dims=collapse_dims)
-        if grid_overrides.has_duplicates:
-            return InsertTraceDimEffect(chunksize=1)
-        return None
+        return self.create_strategy(grid_overrides).schema_effect()
 
     def create_strategy(
         self,
@@ -416,6 +444,7 @@ def create_strategy(
             if grid_overrides.non_binned:
                 strategies.append(
                     NonBinnedStrategy(
+                        chunksize=grid_overrides.chunksize,
                         non_binned_dims=grid_overrides.non_binned_dims or (),
                         coord_fields=coord_fields,
                     )
diff --git a/tests/unit/ingestion/test_metadata.py b/tests/unit/ingestion/test_metadata.py
index 3ef7a928..142579c2 100644
--- a/tests/unit/ingestion/test_metadata.py
+++ b/tests/unit/ingestion/test_metadata.py
@@ -34,11 +34,11 @@ def test_adds_grid_overrides_when_provided(self) -> None:
     def test_preserves_existing_attributes(self) -> None:
         """Existing attribute keys should be preserved when adding overrides."""
         dataset = _make_dataset(attributes={"existing": "value"})
-        overrides = GridOverrides(non_binned=True)
+        overrides = GridOverrides(has_duplicates=True)
         add_grid_override_to_metadata(dataset, grid_overrides=overrides)
         assert dataset.metadata.attributes == {
             "existing": "value",
-            "gridOverrides": {"NonBinned": True},
+            "gridOverrides": {"HasDuplicates": True},
         }
 
     def test_no_overrides_leaves_attributes_untouched(self) -> None:
diff --git a/tests/unit/ingestion/test_schema_effects.py b/tests/unit/ingestion/test_schema_effects.py
index 5bd4a224..cd96fdd2 100644
--- a/tests/unit/ingestion/test_schema_effects.py
+++ b/tests/unit/ingestion/test_schema_effects.py
@@ -47,18 +47,12 @@ def test_header_only_overrides_return_none(self) -> None:
         registry = IndexStrategyRegistry()
         assert registry.schema_effect(GridOverrides(auto_channel_wrap=True)) is None
 
-    def test_non_binned_default_collapse(self) -> None:
-        """NonBinned without explicit dims yields a default (None) CollapseToTraceEffect."""
-        effect = IndexStrategyRegistry().schema_effect(GridOverrides(non_binned=True, chunksize=64))
-        assert isinstance(effect, CollapseToTraceEffect)
-        assert effect.chunksize == 64
-        assert effect.collapse_dims is None
-
-    def test_non_binned_explicit_dims(self) -> None:
-        """NonBinned with explicit dims preserves them on the effect."""
+    def test_non_binned_wires_chunksize_and_dims(self) -> None:
+        """NonBinned yields a CollapseToTraceEffect carrying the override's chunksize and dims."""
         overrides = GridOverrides(non_binned=True, chunksize=128, non_binned_dims=["channel"])
         effect = IndexStrategyRegistry().schema_effect(overrides)
         assert isinstance(effect, CollapseToTraceEffect)
+        assert effect.chunksize == 128
         assert effect.collapse_dims == ("channel",)
 
     def test_has_duplicates_inserts_trace(self) -> None:
diff --git a/tests/unit/ingestion/test_schema_resolver.py b/tests/unit/ingestion/test_schema_resolver.py
index ba748115..07444674 100644
--- a/tests/unit/ingestion/test_schema_resolver.py
+++ b/tests/unit/ingestion/test_schema_resolver.py
@@ -49,11 +49,12 @@ def test_cdp_required_fields(self) -> None:
 class TestSchemaResolverNonBinned:
     """NonBinned overrides collapse spatial dimensions into a single ``trace`` axis."""
 
-    def test_default_collapse_keeps_first_spatial_dim(self) -> None:
-        """Default NonBinned keeps the first spatial dim and collapses the rest into ``trace``."""
+    def test_collapses_named_dims_keeping_first_spatial_dim(self) -> None:
+        """NonBinned collapses the named trailing dims into ``trace``, keeping the leading dim."""
         template = Seismic3DStreamerShotGathersTemplate(data_domain="time")
         # Streamer shot template default chunk shape is (8, 1, 128, 2048).
-        schema = SchemaResolver().resolve(template, GridOverrides(non_binned=True, chunksize=64))
+        overrides = GridOverrides(non_binned=True, chunksize=64, non_binned_dims=["cable", "channel"])
+        schema = SchemaResolver().resolve(template, overrides)
 
         names = [d.name for d in schema.dimensions]
         assert names == ["shot_point", "trace", "time"]
@@ -74,7 +75,8 @@ def test_explicit_non_binned_dims(self) -> None:
     def test_coordinate_dimensions_collapsed_when_referenced(self) -> None:
         """Coordinates referencing collapsed dims are rewritten to depend on ``trace``."""
         template = Seismic3DStreamerShotGathersTemplate(data_domain="time")
-        schema = SchemaResolver().resolve(template, GridOverrides(non_binned=True, chunksize=64))
+        overrides = GridOverrides(non_binned=True, chunksize=64, non_binned_dims=["cable", "channel"])
+        schema = SchemaResolver().resolve(template, overrides)
         # group_coord_x originally depends on (shot_point, cable, channel). After NonBinned
         # collapses cable+channel, it should depend on (shot_point, trace).
         group_coord_x = next(c for c in schema.coordinates if c.name == "group_coord_x")
@@ -83,7 +85,7 @@ def test_coordinate_dimensions_collapsed_when_referenced(self) -> None:
     def test_grid_override_provenance_not_in_schema_metadata(self) -> None:
         """The resolver is mechanics-only: override provenance is attached at the dataset level."""
         template = Seismic3DStreamerShotGathersTemplate(data_domain="time")
-        overrides = GridOverrides(non_binned=True, chunksize=64)
+        overrides = GridOverrides(non_binned=True, chunksize=64, non_binned_dims=["channel"])
         schema = SchemaResolver().resolve(template, overrides)
         assert "gridOverrides" not in schema.metadata
 
diff --git a/tests/unit/ingestion/test_segy_index_strategies.py b/tests/unit/ingestion/test_segy_index_strategies.py
index 14bfbf9f..bea178fe 100644
--- a/tests/unit/ingestion/test_segy_index_strategies.py
+++ b/tests/unit/ingestion/test_segy_index_strategies.py
@@ -15,6 +15,8 @@
 import pytest
 
 from mdio.builder.template_registry import TemplateRegistry
+from mdio.ingestion.schema import CollapseToTraceEffect
+from mdio.ingestion.schema import InsertTraceDimEffect
 from mdio.ingestion.segy.index_strategies import ChannelWrappingStrategy
 
 if TYPE_CHECKING:
@@ -249,6 +251,12 @@ def test_excluded_fields_dropped_from_grouping(self) -> None:
         # shot_point alone so each row in the same shot_point gets a fresh counter.
         np.testing.assert_array_equal(out["trace"], [1, 2, 3])
 
+    def test_owns_insert_trace_dim_effect(self) -> None:
+        """The strategy owns its schema reshape: a chunksize-1 inserted ``trace`` dim."""
+        effect = DuplicateHandlingStrategy().schema_effect()
+        assert isinstance(effect, InsertTraceDimEffect)
+        assert effect.chunksize == 1
+
 
 # ---------------------------------------------------------------------------
 # NonBinnedStrategy
@@ -267,7 +275,7 @@ def test_collapse_dim_excluded_from_counter(self) -> None:
                 "channel": np.array([10, 11, 10, 11], dtype=np.int32),
             }
         )
-        strategy = NonBinnedStrategy(non_binned_dims=("channel",))
+        strategy = NonBinnedStrategy(chunksize=4, non_binned_dims=("channel",))
         out = strategy.transform_headers(headers)
         # Each (shot_point, cable) tuple appears once -> counters all equal 1.
         assert "trace" in out.dtype.names
@@ -283,6 +291,7 @@ def test_coord_fields_also_excluded(self) -> None:
             }
         )
         strategy = NonBinnedStrategy(
+            chunksize=4,
             non_binned_dims=("channel",),
             coord_fields=("cdp_x",),
         )
@@ -290,6 +299,13 @@ def test_coord_fields_also_excluded(self) -> None:
         # Grouping is by shot_point only -> each shot_point has 2 rows -> counters {1, 2}.
         np.testing.assert_array_equal(out["trace"], [1, 2, 1, 2])
 
+    def test_owns_collapse_to_trace_effect(self) -> None:
+        """The strategy owns its schema reshape, carrying chunksize and collapse dims."""
+        effect = NonBinnedStrategy(chunksize=8, non_binned_dims=("channel",)).schema_effect()
+        assert isinstance(effect, CollapseToTraceEffect)
+        assert effect.chunksize == 8
+        assert effect.collapse_dims == ("channel",)
+
 
 # ---------------------------------------------------------------------------
 # ChannelWrappingStrategy
@@ -473,7 +489,7 @@ def test_strategies_run_in_order(self) -> None:
         composite = CompositeStrategy(
             [
                 ComponentSynthesisStrategy(("component",)),
-                NonBinnedStrategy(non_binned_dims=("channel",)),
+                NonBinnedStrategy(chunksize=4, non_binned_dims=("channel",)),
             ]
         )
         out = composite.transform_headers(headers)