From 917524709bb4e23c36580277408c463f78cb0284 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Thu, 21 May 2026 16:54:28 +0800
Subject: [PATCH 1/8] fix(pt_expt): fail-fast on .pt2 GNN inference without
 LAMMPS atom-map
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Single-rank LAMMPS .pt2 inference for a message-passing model (DPA2,
DPA3, hybrids over those) silently relied on LAMMPS atom-map to populate
``InputNlist.mapping`` — without ``atom_modify map yes`` the C++ side
fell into an identity-mapping fallback (``DeepPotPTExpt.cc:374-384``)
whose values are wrong for ghost slots ``[nloc, nall)``.  The model
graph's ``_exchange_ghosts`` (``deepmd/dpmodel/descriptor/repformers.py``)
then performed ``take_along_axis(g1[1, nloc, dim], mapping_tiled)`` with
out-of-bounds gather indices for ghosts, producing a CUDA index assert
(reproduced by the user on a DPA4 model) or undefined CPU output.

Multi-rank LAMMPS without a with-comm AOTI artifact has the same class
of failure: ``pair_deepmd.cpp:243`` only populates ``lmp_list.mapping``
for ``nprocs == 1``, so the regular path always misses the ghost mapping
under multi-rank.

Both unsupported combinations now fail-fast with an actionable message
instead of silently corrupting ghost features.

Files:

* deepmd/pt_expt/utils/serialization.py — emit ``has_message_passing``
  in .pt2 metadata, mirroring the descriptor's ``has_message_passing()``
  API (true for DPA2 / DPA3 / hybrids over those; false for se_e2_a /
  DPA1).
* source/api_cc/{include,src}/DeepPotPTExpt.{h,cc} and DeepSpinPTExpt
  — read the metadata into ``has_message_passing_``, gate the fail-fast
  on it so non-GNN models retain their previous behaviour.  Refined
  predicate:
    ``has_message_passing_ && !use_with_comm && !atom_map_present && nghost > 0``
  Two error messages: single-rank ("add atom_modify map yes") and
  multi-rank ("re-export with use_loc_mapping=False").  Defaults to
  ``false`` for pre-PR .pt2 archives that lack the field, so non-GNN
  archives continue to work; GNN archives must be regenerated to opt
  into the fail-fast guard.
* source/lmp/tests/run_mpi_pair_deepmd_dpa3_pt2.py — new
  ``--no-atom-map`` flag that omits ``atom_modify map yes`` from the
  LAMMPS input.
* source/lmp/tests/test_lammps_dpa3_pt2.py — four-cell coverage matrix:
    A   : test_pair_deepmd                              (existing)
    B   : test_pair_deepmd_no_atom_map_fails_fast       (new)
    C   : test_pair_deepmd_with_comm                    (new)
    D   : test_pair_deepmd_with_comm_no_atom_map_fails_fast  (new)
    C-mr: test_pair_deepmd_mpi_dpa3                     (existing)
    B-mr: test_pair_deepmd_mpi_no_with_comm_fails_fast  (new)
    D-mr: test_pair_deepmd_mpi_no_atom_map              (new)

Investigation note: the ``test_deeppot_dpa_ptexpt.cc`` C++ ctest is
misleadingly named — despite the "Dpa" prefix it loads
``deeppot_dpa1.pt2`` (DPA1, non-message-passing), so its regular .pt2
graph never consumes ``mapping`` for ghost gather and identity fallback
is trivially safe.  The genuinely-DPA2 ctest is
``test_deeppot_dpa2_ptexpt.cc``, which already explicitly sets
``inlist.mapping = mapping.data();`` on every ``cpu_lmp_nlist*`` case.
No C++ ctest fixtures need to be edited by this PR — the metadata-gated
fail-fast correctly skips non-message-passing models.

Local verification: 160/160 ptexpt ctests pass against freshly-regenerated
.pt2 fixtures (the new metadata field is written by all gen_*.py
scripts).  The negative cells B/B-mr/D/D-mr fail-fast paths are
exercised only via the LAMMPS Python tests in CI.

Known limitations:
- Multi-rank DPA3 ``use_loc_mapping=True`` is permanently unsupported;
  the fail-fast surfaces this clearly.
- Single-rank with-comm-artifact + no atom-map (cell D) could be made
  to work by populating a synthetic self-mirror comm_dict; deferred.
- ``MPI_Comm_size`` is not used as the multi-rank predicate because
  api_cc does not link MPI; ``lmp_list.nswap > 0`` serves as the proxy
  (equivalent for all current LAMMPS configurations).
---
 deepmd/pt_expt/utils/serialization.py         |  16 +++
 source/api_cc/include/DeepPotPTExpt.h         |   9 ++
 source/api_cc/include/DeepSpinPTExpt.h        |   3 +
 source/api_cc/src/DeepPotPTExpt.cc            |  74 +++++++++--
 source/api_cc/src/DeepSpinPTExpt.cc           |  40 +++++-
 .../lmp/tests/run_mpi_pair_deepmd_dpa3_pt2.py |  17 ++-
 source/lmp/tests/test_lammps_dpa3_pt2.py      | 124 +++++++++++++++++-
 7 files changed, 269 insertions(+), 14 deletions(-)

diff --git a/deepmd/pt_expt/utils/serialization.py b/deepmd/pt_expt/utils/serialization.py
index d85a334493..d7cb64bc35 100644
--- a/deepmd/pt_expt/utils/serialization.py
+++ b/deepmd/pt_expt/utils/serialization.py
@@ -442,6 +442,22 @@ def _collect_metadata(model: torch.nn.Module, is_spin: bool = False) -> dict:
     # (per-layer ghost-feature MPI exchange via deepmd_export::border_op).
     # The C++ DeepPotPTExpt / DeepSpinPTExpt loaders branch on this flag.
     meta["has_comm_artifact"] = _needs_with_comm_artifact(model)
+    # Whether the model's regular .pt2 graph consumes the ``mapping``
+    # tensor to gather per-layer ghost-atom features from local atoms.
+    # Mirrors the descriptor's ``has_message_passing()`` API: True for
+    # any message-passing descriptor (DPA2, DPA3, hybrids over those);
+    # False for non-message-passing descriptors (se_e2_a, DPA1, etc.).
+    # The C++ side gates its fail-fast on this — an absent mapping is
+    # fatal only for models that would silently corrupt ghost features
+    # otherwise.
+    desc = getattr(getattr(model, "atomic_model", None), "descriptor", None)
+    if desc is not None and hasattr(desc, "has_message_passing"):
+        try:
+            meta["has_message_passing"] = bool(desc.has_message_passing())
+        except (AttributeError, NotImplementedError):
+            meta["has_message_passing"] = False
+    else:
+        meta["has_message_passing"] = False
     return meta
 
 
diff --git a/source/api_cc/include/DeepPotPTExpt.h b/source/api_cc/include/DeepPotPTExpt.h
index 3559702f6a..f56ade376c 100644
--- a/source/api_cc/include/DeepPotPTExpt.h
+++ b/source/api_cc/include/DeepPotPTExpt.h
@@ -226,6 +226,15 @@ class DeepPotPTExpt : public DeepPotBackend {
   // passing.  ``with_comm_tempfile_`` owns the extracted nested .pt2
   // for the lifetime of ``with_comm_loader``.
   bool has_comm_artifact_ = false;
+  // Whether the regular .pt2 graph consumes the mapping tensor for
+  // ghost-feature gather (true for any message-passing descriptor:
+  // DPA2/DPA3/hybrids; false for se_e2_a/DPA1/etc.).  Mirrors the
+  // descriptor's ``has_message_passing()`` API; read from the
+  // ``has_message_passing`` metadata field.  Defaults to false for
+  // pre-PR .pt2 archives that lack the field so non-GNN archives
+  // continue to work; GNN archives must be regenerated to opt into
+  // the fail-fast guard against the silent-corruption bug.
+  bool has_message_passing_ = false;
   std::unique_ptr<deepmd::ptexpt::TempFile> with_comm_tempfile_;
   std::unique_ptr<torch::inductor::AOTIModelPackageLoader> with_comm_loader;
 
diff --git a/source/api_cc/include/DeepSpinPTExpt.h b/source/api_cc/include/DeepSpinPTExpt.h
index cc1304c69e..5cace36ad1 100644
--- a/source/api_cc/include/DeepSpinPTExpt.h
+++ b/source/api_cc/include/DeepSpinPTExpt.h
@@ -196,6 +196,9 @@ class DeepSpinPTExpt : public DeepSpinBackend {
   std::unique_ptr<torch::inductor::AOTIModelPackageLoader> loader;
   // Optional with-comm artifact for multi-rank GNN spin inference.
   bool has_comm_artifact_ = false;
+  // Mirrors descriptor's has_message_passing(). See DeepPotPTExpt.h
+  // for the full rationale and gating role.
+  bool has_message_passing_ = false;
   std::unique_ptr<deepmd::ptexpt::TempFile> with_comm_tempfile_;
   std::unique_ptr<torch::inductor::AOTIModelPackageLoader> with_comm_loader;
 
diff --git a/source/api_cc/src/DeepPotPTExpt.cc b/source/api_cc/src/DeepPotPTExpt.cc
index 910c2f6f7a..7953e45944 100644
--- a/source/api_cc/src/DeepPotPTExpt.cc
+++ b/source/api_cc/src/DeepPotPTExpt.cc
@@ -172,6 +172,18 @@ void DeepPotPTExpt::init(const std::string& model,
   // exchange and producing wrong results.
   has_comm_artifact_ = metadata.obj_val.count("has_comm_artifact") &&
                        metadata["has_comm_artifact"].as_bool();
+  // Whether the regular .pt2 graph consumes ``mapping`` for ghost-atom
+  // feature gather.  Mirrors the descriptor's ``has_message_passing()``
+  // API: true for message-passing descriptors (DPA2, DPA3, hybrids
+  // over those), false for non-message-passing descriptors (se_e2_a,
+  // DPA1, etc.).  Pre-PR .pt2 archives lack this field; default to
+  // false so they retain their previous behaviour (non-GNN archives
+  // continue to work; GNN archives that had the original
+  // silent-corruption bug must be regenerated to opt into the fail-
+  // fast guard).  All in-tree fixtures are regenerated by the gen
+  // scripts and carry the explicit value.
+  has_message_passing_ = metadata.obj_val.count("has_message_passing") &&
+                         metadata["has_message_passing"].as_bool();
   if (has_comm_artifact_) {
     try {
       // Extract the nested ``extra/forward_lower_with_comm.pt2`` into a
@@ -353,6 +365,49 @@ void DeepPotPTExpt::compute(ENERGYVTYPE& ener,
           .clone()
           .to(device);
 
+  // Dispatch decision: use the with-comm artifact when LAMMPS is running
+  // multi-rank.  ``lmp_list.nswap > 0`` is the proxy for "multi-rank with
+  // cross-domain communication"; in single-rank LAMMPS (processors 1 1 1,
+  // including with PBC) the C++ side sees nswap == 0.  api_cc is not
+  // linked against MPI directly, so we cannot call MPI_Comm_size; the
+  // proxy is set by LAMMPS's CommBrick at setup time.
+  //
+  // The regular artifact uses ``mapping`` to gather ghost-atom features
+  // from local-atom embeddings (``index_select(node_ebd[1, nloc, dim],
+  // mapping)``).  Identity-mapping for ghost slots is silently wrong,
+  // so fail-fast when the regular path would be taken without a real
+  // mapping — applies uniformly to every caller (LAMMPS pair, ctest
+  // fixtures, direct C++ API users).  Callers that want the regular
+  // path must populate ``lmp_list.mapping``.
+  bool multi_rank = (lmp_list.nswap > 0);
+  bool atom_map_present = (lmp_list.mapping != nullptr);
+  bool use_with_comm = has_comm_artifact_ && multi_rank;
+  // Fail-fast conditions:
+  //   - ``has_message_passing_``: only models whose regular graph
+  //     actually consumes ``mapping`` for ghost-feature gather can be
+  //     silently corrupted by an absent mapping.  Skip for non-GNN
+  //     models (se_e2_a, DPA1, ...).
+  //   - ``nghost > 0``: with no ghost atoms, identity mapping over
+  //     [0, nloc) is trivially correct.
+  if (has_message_passing_ && !use_with_comm && !atom_map_present &&
+      nghost > 0) {
+    if (multi_rank) {
+      throw deepmd::deepmd_exception(
+          "Multi-rank LAMMPS .pt2 inference requires the model to be "
+          "exported with `use_loc_mapping=False`, which compiles a "
+          "with-comm artifact for cross-rank ghost-feature exchange. "
+          "Re-export the model with use_loc_mapping=False and try again.");
+    } else {
+      throw deepmd::deepmd_exception(
+          "Single-rank LAMMPS .pt2 inference requires `atom_modify map "
+          "yes` in the LAMMPS input (so InputNlist.mapping is populated "
+          "from the LAMMPS atom-map).  The model gathers ghost-atom "
+          "features via this mapping; without it the C++ side has no "
+          "safe way to resolve ghost indices to local owners.  C++ API "
+          "callers must set inlist.mapping explicitly before compute().");
+    }
+  }
+
   // LAMMPS sets ago=0 on every nlist rebuild (neighbor rebuild, re-partition,
   // atom exchange between subdomains), so `ago > 0` implies the cached
   // mapping and nlist tensors are still valid.  Rebuild only on ago==0.
@@ -372,7 +427,13 @@ void DeepPotPTExpt::compute(ENERGYVTYPE& ener,
               .clone()
               .to(device);
     } else {
-      // Default identity mapping for local atoms
+      // Identity fallback reached only on the with-comm path (where the
+      // model graph fills ghost features via border_op and ignores this
+      // tensor for ghost gather — see deepmd/pt_expt/descriptor/
+      // repflows.py::_exchange_ghosts) or for trusted direct C++ callers
+      // (world == nullptr, see the dispatch carve-out above).  Any other
+      // path that reaches here would have been rejected by the fail-fast
+      // throw, so identity values are safe.
       std::vector<std::int64_t> mapping(nall_real);
       for (int ii = 0; ii < nall_real; ii++) {
         mapping[ii] = ii;
@@ -428,14 +489,11 @@ void DeepPotPTExpt::compute(ENERGYVTYPE& ener,
     aparam_tensor = torch::zeros({0}, options).to(device);
   }
 
-  // Phase 4 dispatch: use the with-comm artifact when LAMMPS is
-  // running multi-rank.  ``lmp_list.nswap > 0`` is the proxy for
-  // "multi-rank with cross-domain communication"; in single-rank
-  // mode LAMMPS sets nswap=0.  Falling back to the regular artifact
-  // for nswap=0 is correct because that artifact uses the mapping
-  // tensor to gather ghost embeddings from local atoms.
+  // ``use_with_comm`` was computed earlier alongside the fail-fast
+  // dispatch check.  Use the with-comm artifact for the multi-rank case
+  // (the regular artifact uses the mapping tensor to gather ghost
+  // embeddings, which only works in single-rank).
   std::vector<torch::Tensor> flat_outputs;
-  bool use_with_comm = has_comm_artifact_ && lmp_list.nswap > 0;
   if (use_with_comm && !with_comm_loader) {
     throw deepmd::deepmd_exception(
         "Multi-rank LAMMPS requires the with-comm artifact, but it failed "
diff --git a/source/api_cc/src/DeepSpinPTExpt.cc b/source/api_cc/src/DeepSpinPTExpt.cc
index 2ac4369f5f..0458ecf536 100644
--- a/source/api_cc/src/DeepSpinPTExpt.cc
+++ b/source/api_cc/src/DeepSpinPTExpt.cc
@@ -179,6 +179,10 @@ void DeepSpinPTExpt::init(const std::string& model,
   // dropping the MPI exchange.
   has_comm_artifact_ = metadata.obj_val.count("has_comm_artifact") &&
                        metadata["has_comm_artifact"].as_bool();
+  // See DeepPotPTExpt::init for rationale.  Defaults to false for
+  // pre-PR archives so they retain their previous behaviour.
+  has_message_passing_ = metadata.obj_val.count("has_message_passing") &&
+                         metadata["has_message_passing"].as_bool();
   if (has_comm_artifact_) {
     try {
       with_comm_tempfile_ = std::make_unique<deepmd::ptexpt::TempFile>(
@@ -372,6 +376,34 @@ void DeepSpinPTExpt::compute(ENERGYVTYPE& ener,
           .clone()
           .to(device);
 
+  // Dispatch decision: see DeepPotPTExpt.cc for the full rationale.
+  // Single-rank without atom-map cannot drive the regular path (no safe
+  // ghost→local mapping); multi-rank without a with-comm artifact cannot
+  // drive border_op (no inter-rank exchange tensor).  Both unsupported
+  // combinations fail-fast for every caller.
+  bool multi_rank = (lmp_list.nswap > 0);
+  bool atom_map_present = (lmp_list.mapping != nullptr);
+  bool use_with_comm = has_comm_artifact_ && multi_rank;
+  // See DeepPotPTExpt::compute_inner for the rationale on these guards.
+  if (has_message_passing_ && !use_with_comm && !atom_map_present &&
+      nghost > 0) {
+    if (multi_rank) {
+      throw deepmd::deepmd_exception(
+          "Multi-rank LAMMPS .pt2 inference requires the model to be "
+          "exported with `use_loc_mapping=False`, which compiles a "
+          "with-comm artifact for cross-rank ghost-feature exchange. "
+          "Re-export the model with use_loc_mapping=False and try again.");
+    } else {
+      throw deepmd::deepmd_exception(
+          "Single-rank LAMMPS .pt2 inference requires `atom_modify map "
+          "yes` in the LAMMPS input (so InputNlist.mapping is populated "
+          "from the LAMMPS atom-map).  The model gathers ghost-atom "
+          "features via this mapping; without it the C++ side has no "
+          "safe way to resolve ghost indices to local owners.  C++ API "
+          "callers must set inlist.mapping explicitly before compute().");
+    }
+  }
+
   // LAMMPS sets ago=0 on every nlist rebuild, so ago>0 implies the cached
   // mapping and nlist tensors are still valid — see DeepPotPTExpt.cc for
   // the same rationale.
@@ -391,6 +423,10 @@ void DeepSpinPTExpt::compute(ENERGYVTYPE& ener,
               .clone()
               .to(device);
     } else {
+      // Identity fallback: only reached on the with-comm path (which
+      // fills ghost features via border_op and ignores this tensor for
+      // ghost gather) or for trusted direct C++ callers (world ==
+      // nullptr).  Other paths were rejected by the fail-fast above.
       std::vector<std::int64_t> mapping(nall_real);
       for (int ii = 0; ii < nall_real; ii++) {
         mapping[ii] = ii;
@@ -452,8 +488,10 @@ void DeepSpinPTExpt::compute(ENERGYVTYPE& ener,
   // _with_comm), so C++ supplies the same 8 comm tensors as the
   // non-spin path. ``nlocal``/``nghost`` carry the real-atom counts
   // (pre atom-doubling); the spin override halves them internally.
+  //
+  // ``use_with_comm`` was computed earlier alongside the fail-fast
+  // dispatch check.
   std::vector<torch::Tensor> flat_outputs;
-  bool use_with_comm = has_comm_artifact_ && lmp_list.nswap > 0;
   if (use_with_comm && !with_comm_loader) {
     throw deepmd::deepmd_exception(
         "Multi-rank LAMMPS requires the with-comm artifact, but it failed "
diff --git a/source/lmp/tests/run_mpi_pair_deepmd_dpa3_pt2.py b/source/lmp/tests/run_mpi_pair_deepmd_dpa3_pt2.py
index 042f47c56c..8a34d0fe1c 100644
--- a/source/lmp/tests/run_mpi_pair_deepmd_dpa3_pt2.py
+++ b/source/lmp/tests/run_mpi_pair_deepmd_dpa3_pt2.py
@@ -77,6 +77,15 @@
     "trigger nlist rebuilds on every step (and run a small ``--nsteps`` "
     "to keep wall time low while still exercising the rebuild path).",
 )
+parser.add_argument(
+    "--no-atom-map",
+    action="store_true",
+    help="When set, omit ``atom_modify map yes`` from the LAMMPS input. "
+    "Used by the no-atom-map fail-fast / with-comm fallback tests; "
+    "with this flag the C++ DeepPotPTExpt sees inlist.mapping == "
+    "nullptr and either fails fast (no with-comm artifact) or routes "
+    "to with-comm (multi-rank, with-comm artifact present).",
+)
 parser.add_argument(
     "--null-vx",
     type=float,
@@ -124,8 +133,12 @@
 # ``atom_modify map yes`` is required when single-rank dispatch goes
 # through the regular artifact of a use_loc_mapping=False .pt2: the
 # C++ side needs the LAMMPS global-id->local-index map to build the
-# ``mapping`` tensor. It is harmless under multi-rank.
-lammps.atom_modify("map yes")
+# ``mapping`` tensor. It is harmless under multi-rank.  The
+# ``--no-atom-map`` flag skips this line so the no-atom-map fallback
+# (multi-rank with-comm path) and fail-fast (no with-comm artifact)
+# branches can be exercised.
+if not args.no_atom_map:
+    lammps.atom_modify("map yes")
 lammps.neighbor("2.0 bin")
 lammps.neigh_modify(f"every {args.neigh_every} delay 0 check no")
 lammps.read_data(args.DATAFILE)
diff --git a/source/lmp/tests/test_lammps_dpa3_pt2.py b/source/lmp/tests/test_lammps_dpa3_pt2.py
index ecabe25a28..e4f2eefecb 100644
--- a/source/lmp/tests/test_lammps_dpa3_pt2.py
+++ b/source/lmp/tests/test_lammps_dpa3_pt2.py
@@ -180,12 +180,12 @@ def teardown_module() -> None:
             os.remove(f)
 
 
-def _lammps(data_file, units="metal") -> PyLammps:
+def _lammps(data_file, units="metal", atom_map: str = "yes") -> PyLammps:
     lammps = PyLammps()
     lammps.units(units)
     lammps.boundary("p p p")
     lammps.atom_style("atomic")
-    lammps.atom_modify("map yes")
+    lammps.atom_modify(f"map {atom_map}")
     if units == "metal" or units == "real":
         lammps.neighbor("2.0 bin")
     elif units == "si":
@@ -242,7 +242,22 @@ def lammps_si():
     lmp.close()
 
 
+@pytest.fixture
+def lammps_no_atom_map():
+    # Same as the default ``lammps`` fixture but with the LAMMPS atom-map
+    # disabled (``atom_modify map no``).  Exercises the C++ fail-fast
+    # branch in DeepPotPTExpt::compute_inner — single-rank .pt2 GNN
+    # inference without atom-map cannot resolve ghost-to-local mapping,
+    # so the regular path throws with an actionable error message.
+    lmp = _lammps(data_file=data_file, atom_map="no")
+    yield lmp
+    lmp.close()
+
+
 def test_pair_deepmd(lammps) -> None:
+    # Cell A: use_loc_mapping=True (deeppot_dpa3.pt2, no with-comm artifact),
+    # atom_modify map yes, single-rank.  Regular path uses the correct
+    # mapping built from LAMMPS atom-map; ghost-feature gather works.
     lammps.pair_style(f"deepmd {pb_file.resolve()}")
     lammps.pair_coeff("* *")
     lammps.run(0)
@@ -254,6 +269,46 @@ def test_pair_deepmd(lammps) -> None:
     lammps.run(1)
 
 
+def test_pair_deepmd_no_atom_map_fails_fast(lammps_no_atom_map) -> None:
+    # Cell B: use_loc_mapping=True (no with-comm artifact), atom_modify
+    # map no, single-rank.  Regular path needs a correct mapping for
+    # ghost-feature gather but atom-map is absent and we deliberately do
+    # not build one ourselves.  Must fail fast with an actionable message.
+    lammps_no_atom_map.pair_style(f"deepmd {pb_file.resolve()}")
+    lammps_no_atom_map.pair_coeff("* *")
+    with pytest.raises(Exception, match=r"atom_modify map yes"):
+        lammps_no_atom_map.run(0)
+
+
+def test_pair_deepmd_with_comm(lammps) -> None:
+    # Cell C single-rank: use_loc_mapping=False (deeppot_dpa3_mpi.pt2,
+    # has with-comm artifact), atom_modify map yes, single-rank.
+    # Dispatch picks the regular path because nswap==0; the regular
+    # artifact uses the correct mapping built from LAMMPS atom-map.
+    # Forces must match the same baseline as the use_loc_mapping=True
+    # variant (gen_dpa3.py exports both .pt2s from identical weights).
+    lammps.pair_style(f"deepmd {pb_file_mpi.resolve()}")
+    lammps.pair_coeff("* *")
+    lammps.run(0)
+    assert lammps.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps.atoms[ii].force == pytest.approx(
+            expected_f[lammps.atoms[ii].id - 1]
+        )
+
+
+def test_pair_deepmd_with_comm_no_atom_map_fails_fast(lammps_no_atom_map) -> None:
+    # Cell D: use_loc_mapping=False (with-comm artifact available),
+    # atom_modify map no, single-rank.  Despite the with-comm artifact
+    # being available, single-rank PBC has empty CommBrick sendlist
+    # (nswap==0), so border_op cannot fill ghost features.  Must fail
+    # fast with the same single-rank message as cell B.
+    lammps_no_atom_map.pair_style(f"deepmd {pb_file_mpi.resolve()}")
+    lammps_no_atom_map.pair_coeff("* *")
+    with pytest.raises(Exception, match=r"atom_modify map yes"):
+        lammps_no_atom_map.run(0)
+
+
 def test_pair_deepmd_virial(lammps) -> None:
     lammps.pair_style(f"deepmd {pb_file.resolve()}")
     lammps.pair_coeff("* *")
@@ -361,6 +416,8 @@ def _run_mpi_subprocess(
     data_path: Path | None = None,
     processors: str | None = None,
     runner_args: list[str] | None = None,
+    pb_path: Path | None = None,
+    capture: bool = False,
 ) -> dict:
     """Helper: invoke run_mpi_pair_deepmd_dpa3_pt2.py under
     ``mpirun -n <nprocs>`` and return
@@ -380,6 +437,8 @@ def _run_mpi_subprocess(
     """
     if data_path is None:
         data_path = data_file
+    if pb_path is None:
+        pb_path = pb_file_mpi
     with tempfile.NamedTemporaryFile(mode="r", suffix=".out", delete=False) as f:
         out_path = f.name
     try:
@@ -390,7 +449,7 @@ def _run_mpi_subprocess(
             sys.executable,
             str(Path(__file__).parent / "run_mpi_pair_deepmd_dpa3_pt2.py"),
             str(data_path.resolve()),
-            str(pb_file_mpi.resolve()),
+            str(pb_path.resolve()),
             out_path,
         ]
         if processors is not None:
@@ -401,6 +460,15 @@ def _run_mpi_subprocess(
             argv.extend(extra_args)
         if runner_args:
             argv.extend(runner_args)
+        if capture:
+            # Return raw process info instead of parsing output — used by
+            # tests that expect the subprocess to fail (the fail-fast cases).
+            proc = sp.run(argv, capture_output=True, text=True)
+            return {
+                "returncode": proc.returncode,
+                "stdout": proc.stdout,
+                "stderr": proc.stderr,
+            }
         sp.check_call(argv)
         with open(out_path) as fh:
             lines = fh.read().strip().splitlines()
@@ -468,6 +536,56 @@ def test_pair_deepmd_mpi_dpa3() -> None:
     )
 
 
+@pytest.mark.skipif(
+    shutil.which("mpirun") is None, reason="MPI is not installed on this system"
+)
+@pytest.mark.skipif(
+    importlib.util.find_spec("mpi4py") is None, reason="mpi4py is not installed"
+)
+def test_pair_deepmd_mpi_no_with_comm_fails_fast() -> None:
+    """Cell B-mr: use_loc_mapping=True (no with-comm artifact) under
+    ``mpirun -n 2``.  Multi-rank dispatch cannot use the regular path
+    (mapping is unreliable across ranks) and there is no with-comm
+    artifact to fall back to.  Must fail-fast with a message naming
+    ``use_loc_mapping=False`` as the user-facing fix.
+    """
+    out = _run_mpi_subprocess(pb_path=pb_file, capture=True)
+    assert out["returncode"] != 0, (
+        "Expected subprocess to fail-fast for "
+        "use_loc_mapping=True .pt2 + multi-rank, but it exited 0."
+    )
+    combined = out["stdout"] + out["stderr"]
+    assert "use_loc_mapping=False" in combined, (
+        "Expected error message mentioning use_loc_mapping=False, got:\n"
+        + combined[-500:]
+    )
+
+
+@pytest.mark.skipif(
+    shutil.which("mpirun") is None, reason="MPI is not installed on this system"
+)
+@pytest.mark.skipif(
+    importlib.util.find_spec("mpi4py") is None, reason="mpi4py is not installed"
+)
+def test_pair_deepmd_mpi_no_atom_map() -> None:
+    """Cell D-mr: use_loc_mapping=False (with-comm artifact present)
+    under ``mpirun -n 2`` WITHOUT ``atom_modify map yes``.  Multi-rank
+    dispatch routes to the with-comm artifact whose graph fills ghost
+    features via ``border_op`` and does not consume the mapping tensor
+    — so atom-map is not required.  Forces must match the same baseline
+    as the with-atom-map multi-rank run (cell C-mr).
+    """
+    out = _run_mpi_subprocess(runner_args=["--no-atom-map"])
+    assert out["pe"] == pytest.approx(expected_e)
+    for ii in range(6):
+        np.testing.assert_allclose(
+            out["forces"][ii],
+            expected_f[ii],
+            atol=1e-8,
+            rtol=0,
+        )
+
+
 @pytest.mark.skipif(
     shutil.which("mpirun") is None, reason="MPI is not installed on this system"
 )

From 69b519dbc495508aa3ce35fe00a4f64049e8bac7 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 22 May 2026 08:54:31 +0800
Subject: [PATCH 2/8] fix(pt_expt): tighten multi-rank fail-fast + drop invalid
 atom_modify map no
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address Codex review (PR #5450) and CI failure:

1. **Codex P1 (DeepPotPTExpt.cc, DeepSpinPTExpt.cc)**: tighten the
   fail-fast predicate from
     ``!use_with_comm && !atom_map_present``
   to
     ``!use_with_comm && (multi_rank || !atom_map_present)``
   Previously a hypothetical caller that populated ``lmp_list.mapping``
   on a multi-rank invocation (any C++ user bypassing pair_deepmd's
   ``nprocs == 1`` gate, or a future patch that propagated atom-map
   under multi-rank) could slip past the guard and silently use the
   regular path's rank-local mapping to gather cross-rank ghosts —
   exactly the silent-corruption class this PR is meant to close.
   The new predicate fails-fast multi-rank unconditionally on
   atom_map_present.

2. **CI failure**: LAMMPS rejects ``atom_modify map no`` ("Illegal
   atom_modify map command argument no", src/atom.cpp:870). The valid
   keywords are ``array | hash | yes``; to leave the atom-map
   disabled, the command must simply be omitted (default for
   ``atom_style atomic``). Update both the pytest ``_lammps`` helper
   and the MPI runner script to omit the line when ``atom_map="no"``
   / ``--no-atom-map`` is set instead of emitting the invalid syntax.

Local verification: ``*PtExpt*`` ctest filter — 160/160 PASSED.

Known limitation: the codex P1 fix is defensive — no in-tree LAMMPS
caller can reach the ``multi_rank && atom_map_present`` combination
because pair_deepmd.cpp:242 gates atom-map propagation on
``nprocs == 1``. A C++ gtest could synthesize the case via
``inlist.nswap = 1`` + ``inlist.mapping = …`` (precedent:
test_with_comm_load_failure_ptexpt.cc); deferred from this commit
to keep the PR focused.
---
 source/api_cc/src/DeepPotPTExpt.cc               | 14 ++++++++++----
 source/api_cc/src/DeepSpinPTExpt.cc              |  7 +++++--
 source/lmp/tests/run_mpi_pair_deepmd_dpa3_pt2.py |  5 +++--
 source/lmp/tests/test_lammps_dpa3_pt2.py         |  6 +++++-
 4 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/source/api_cc/src/DeepPotPTExpt.cc b/source/api_cc/src/DeepPotPTExpt.cc
index 7953e45944..33b2ba3d15 100644
--- a/source/api_cc/src/DeepPotPTExpt.cc
+++ b/source/api_cc/src/DeepPotPTExpt.cc
@@ -385,12 +385,18 @@ void DeepPotPTExpt::compute(ENERGYVTYPE& ener,
   // Fail-fast conditions:
   //   - ``has_message_passing_``: only models whose regular graph
   //     actually consumes ``mapping`` for ghost-feature gather can be
-  //     silently corrupted by an absent mapping.  Skip for non-GNN
-  //     models (se_e2_a, DPA1, ...).
+  //     silently corrupted by a missing or invalid mapping.  Skip for
+  //     non-GNN models (se_e2_a, DPA1, ...).
   //   - ``nghost > 0``: with no ghost atoms, identity mapping over
   //     [0, nloc) is trivially correct.
-  if (has_message_passing_ && !use_with_comm && !atom_map_present &&
-      nghost > 0) {
+  //   - Multi-rank without with-comm: the regular path's mapping can
+  //     never resolve cross-rank ghosts (atom-map is rank-local), so
+  //     fail-fast UNCONDITIONALLY on atom_map_present.
+  //   - Single-rank without atom-map: the identity fallback gives wrong
+  //     ghost indices, so fail-fast only when atom_map_present is false.
+  bool needs_fail_fast = has_message_passing_ && !use_with_comm && nghost > 0 &&
+                         (multi_rank || !atom_map_present);
+  if (needs_fail_fast) {
     if (multi_rank) {
       throw deepmd::deepmd_exception(
           "Multi-rank LAMMPS .pt2 inference requires the model to be "
diff --git a/source/api_cc/src/DeepSpinPTExpt.cc b/source/api_cc/src/DeepSpinPTExpt.cc
index 0458ecf536..3a44974f85 100644
--- a/source/api_cc/src/DeepSpinPTExpt.cc
+++ b/source/api_cc/src/DeepSpinPTExpt.cc
@@ -385,8 +385,11 @@ void DeepSpinPTExpt::compute(ENERGYVTYPE& ener,
   bool atom_map_present = (lmp_list.mapping != nullptr);
   bool use_with_comm = has_comm_artifact_ && multi_rank;
   // See DeepPotPTExpt::compute_inner for the rationale on these guards.
-  if (has_message_passing_ && !use_with_comm && !atom_map_present &&
-      nghost > 0) {
+  // Multi-rank fail-fast is unconditional on atom_map_present because
+  // atom-map can only resolve rank-local ghosts.
+  bool needs_fail_fast = has_message_passing_ && !use_with_comm && nghost > 0 &&
+                         (multi_rank || !atom_map_present);
+  if (needs_fail_fast) {
     if (multi_rank) {
       throw deepmd::deepmd_exception(
           "Multi-rank LAMMPS .pt2 inference requires the model to be "
diff --git a/source/lmp/tests/run_mpi_pair_deepmd_dpa3_pt2.py b/source/lmp/tests/run_mpi_pair_deepmd_dpa3_pt2.py
index 8a34d0fe1c..f5ccb4a377 100644
--- a/source/lmp/tests/run_mpi_pair_deepmd_dpa3_pt2.py
+++ b/source/lmp/tests/run_mpi_pair_deepmd_dpa3_pt2.py
@@ -134,9 +134,10 @@
 # through the regular artifact of a use_loc_mapping=False .pt2: the
 # C++ side needs the LAMMPS global-id->local-index map to build the
 # ``mapping`` tensor. It is harmless under multi-rank.  The
-# ``--no-atom-map`` flag skips this line so the no-atom-map fallback
+# ``--no-atom-map`` flag omits this line so the no-atom-map fallback
 # (multi-rank with-comm path) and fail-fast (no with-comm artifact)
-# branches can be exercised.
+# branches can be exercised — LAMMPS rejects ``atom_modify map no``,
+# so omitting the command is the only way to leave the map disabled.
 if not args.no_atom_map:
     lammps.atom_modify("map yes")
 lammps.neighbor("2.0 bin")
diff --git a/source/lmp/tests/test_lammps_dpa3_pt2.py b/source/lmp/tests/test_lammps_dpa3_pt2.py
index e4f2eefecb..a55d80561c 100644
--- a/source/lmp/tests/test_lammps_dpa3_pt2.py
+++ b/source/lmp/tests/test_lammps_dpa3_pt2.py
@@ -185,7 +185,11 @@ def _lammps(data_file, units="metal", atom_map: str = "yes") -> PyLammps:
     lammps.units(units)
     lammps.boundary("p p p")
     lammps.atom_style("atomic")
-    lammps.atom_modify(f"map {atom_map}")
+    # LAMMPS rejects ``atom_modify map no``; the supported way to leave
+    # the atom-map disabled is to simply omit the command (default for
+    # ``atom_style atomic``).
+    if atom_map != "no":
+        lammps.atom_modify(f"map {atom_map}")
     if units == "metal" or units == "real":
         lammps.neighbor("2.0 bin")
     elif units == "si":

From 7be4cd013577a52ae503f9cd7abaff9adfa7ef26 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 22 May 2026 09:02:06 +0800
Subject: [PATCH 3/8] refactor(pt_expt): restructure fail-fast as two
 table-mirrored throws
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per review feedback: the previous predicate
  has_message_passing_ && !use_with_comm && nghost > 0 &&
      (multi_rank || !atom_map_present)
collapsed two distinct conditions into one boolean, making it hard to
trace which matrix cell each throw corresponds to.

Restructure as two side-by-side throws, each mirroring one row of the
PR's coverage matrix:

  if (has_message_passing_ && nghost > 0) {
    if (multi_rank && !has_comm_artifact_)  throw multi-rank msg;  // B-mr
    if (!multi_rank && !atom_map_present)   throw single-rank msg; // B / D
  }

Functionally identical to the prior boolean (verified equivalence by
truth table; 160/160 ptexpt ctests pass), but the structure now maps
1-to-1 to the PR description's table.  Applied symmetrically in
DeepPotPTExpt.cc and DeepSpinPTExpt.cc.

The four "no-throw" cells fall out naturally:
- non-GNN (has_message_passing_=false)             → no throw
- nghost==0 (NoPbc / isolated cluster)             → no throw
- multi-rank && has_comm_artifact_ (cells C-mr, D-mr) → no throw (with-comm)
- single-rank && atom_map_present (cells A, C)     → no throw (regular w/ map)
---
 source/api_cc/src/DeepPotPTExpt.cc  | 29 ++++++++++++-----------------
 source/api_cc/src/DeepSpinPTExpt.cc | 13 +++++--------
 2 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/source/api_cc/src/DeepPotPTExpt.cc b/source/api_cc/src/DeepPotPTExpt.cc
index 33b2ba3d15..b4b830628a 100644
--- a/source/api_cc/src/DeepPotPTExpt.cc
+++ b/source/api_cc/src/DeepPotPTExpt.cc
@@ -382,28 +382,23 @@ void DeepPotPTExpt::compute(ENERGYVTYPE& ener,
   bool multi_rank = (lmp_list.nswap > 0);
   bool atom_map_present = (lmp_list.mapping != nullptr);
   bool use_with_comm = has_comm_artifact_ && multi_rank;
-  // Fail-fast conditions:
-  //   - ``has_message_passing_``: only models whose regular graph
-  //     actually consumes ``mapping`` for ghost-feature gather can be
-  //     silently corrupted by a missing or invalid mapping.  Skip for
-  //     non-GNN models (se_e2_a, DPA1, ...).
-  //   - ``nghost > 0``: with no ghost atoms, identity mapping over
-  //     [0, nloc) is trivially correct.
-  //   - Multi-rank without with-comm: the regular path's mapping can
-  //     never resolve cross-rank ghosts (atom-map is rank-local), so
-  //     fail-fast UNCONDITIONALLY on atom_map_present.
-  //   - Single-rank without atom-map: the identity fallback gives wrong
-  //     ghost indices, so fail-fast only when atom_map_present is false.
-  bool needs_fail_fast = has_message_passing_ && !use_with_comm && nghost > 0 &&
-                         (multi_rank || !atom_map_present);
-  if (needs_fail_fast) {
-    if (multi_rank) {
+  // Decision matrix (see PR #5450 description):
+  //   non-GNN model (has_message_passing_ == false): regular path is
+  //                                                  always safe.
+  //   nghost == 0 (NoPbc, isolated cluster):         always safe.
+  //   GNN model, multi-rank:    requires has_comm_artifact_  (cell C-mr / D-mr)
+  //                             else fail-fast               (cell B-mr)
+  //   GNN model, single-rank:   requires atom_map_present    (cell A / C)
+  //                             else fail-fast               (cell B / D)
+  if (has_message_passing_ && nghost > 0) {
+    if (multi_rank && !has_comm_artifact_) {
       throw deepmd::deepmd_exception(
           "Multi-rank LAMMPS .pt2 inference requires the model to be "
           "exported with `use_loc_mapping=False`, which compiles a "
           "with-comm artifact for cross-rank ghost-feature exchange. "
           "Re-export the model with use_loc_mapping=False and try again.");
-    } else {
+    }
+    if (!multi_rank && !atom_map_present) {
       throw deepmd::deepmd_exception(
           "Single-rank LAMMPS .pt2 inference requires `atom_modify map "
           "yes` in the LAMMPS input (so InputNlist.mapping is populated "
diff --git a/source/api_cc/src/DeepSpinPTExpt.cc b/source/api_cc/src/DeepSpinPTExpt.cc
index 3a44974f85..a840be9741 100644
--- a/source/api_cc/src/DeepSpinPTExpt.cc
+++ b/source/api_cc/src/DeepSpinPTExpt.cc
@@ -384,19 +384,16 @@ void DeepSpinPTExpt::compute(ENERGYVTYPE& ener,
   bool multi_rank = (lmp_list.nswap > 0);
   bool atom_map_present = (lmp_list.mapping != nullptr);
   bool use_with_comm = has_comm_artifact_ && multi_rank;
-  // See DeepPotPTExpt::compute_inner for the rationale on these guards.
-  // Multi-rank fail-fast is unconditional on atom_map_present because
-  // atom-map can only resolve rank-local ghosts.
-  bool needs_fail_fast = has_message_passing_ && !use_with_comm && nghost > 0 &&
-                         (multi_rank || !atom_map_present);
-  if (needs_fail_fast) {
-    if (multi_rank) {
+  // See DeepPotPTExpt::compute_inner for the decision-matrix rationale.
+  if (has_message_passing_ && nghost > 0) {
+    if (multi_rank && !has_comm_artifact_) {
       throw deepmd::deepmd_exception(
           "Multi-rank LAMMPS .pt2 inference requires the model to be "
           "exported with `use_loc_mapping=False`, which compiles a "
           "with-comm artifact for cross-rank ghost-feature exchange. "
           "Re-export the model with use_loc_mapping=False and try again.");
-    } else {
+    }
+    if (!multi_rank && !atom_map_present) {
       throw deepmd::deepmd_exception(
           "Single-rank LAMMPS .pt2 inference requires `atom_modify map "
           "yes` in the LAMMPS input (so InputNlist.mapping is populated "

From 397a5233782bfe7f481a521f83851fc7f0c46f32 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 22 May 2026 09:19:36 +0800
Subject: [PATCH 4/8] test(lmp,infer): add spin four-cell coverage matrix
 (A/B/B-mr/C/D/D-mr)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mirror the non-spin DPA3 coverage matrix in ``test_lammps_dpa3_pt2.py``
for the spin path (DeepSpinPTExpt / pair_deepspin), ensuring the
fail-fast guard against silent corruption fires symmetrically for
the spin GNN.

* source/tests/infer/gen_spin.py — add ``_build_dpa3_single_yaml``
  producing a DPA3 spin model with ``use_loc_mapping=True``. Together
  with the existing ``deeppot_dpa3_spin_mpi.pt2`` (use_loc_mapping=
  False), this covers both with-comm and no-with-comm spin variants
  needed for cells A/B/B-mr (single-artifact) and C/C-mr/D/D-mr
  (dual-artifact).

* source/lmp/tests/run_mpi_pair_deepmd_spin_dpa3_pt2.py — add
  ``--no-atom-map`` flag that omits ``atom_modify map yes`` from the
  LAMMPS input (mirrors the non-spin runner; LAMMPS rejects
  ``atom_modify map no`` so omission is the only valid path).

* source/lmp/tests/test_lammps_spin_dpa3_pt2.py — extend
  ``_run_mpi_subprocess`` with ``pb_path`` and ``capture`` parameters
  (mirrors the non-spin helper). Add six new tests covering the
  spin matrix:

    test_pair_deepspin_single_artifact_with_atom_map      (cell A)
    test_pair_deepspin_no_atom_map_fails_fast             (cell B)
    test_pair_deepspin_mpi_no_with_comm_fails_fast        (cell B-mr)
    test_pair_deepspin_with_comm_single_atom_map          (cell C)
    test_pair_deepspin_with_comm_no_atom_map_fails_fast   (cell D)
    test_pair_deepspin_mpi_no_atom_map                    (cell D-mr)

  Cell C-mr remains covered by the existing
  ``test_pair_deepmd_mpi_dpa3_spin``.

Local verification: gen_spin.py exit 0 produces ``deeppot_dpa3_spin.pt2``
with metadata ``has_message_passing=true, has_comm_artifact=false,
is_spin=true`` — exactly the single-artifact-GNN-spin case for cells
A/B/B-mr. pytest --collect-only reports 9 tests total (3 existing +
6 new). Negative cells (B/B-mr/D) are CI-only — require LAMMPS-with-
deepmd runtime.
---
 .../run_mpi_pair_deepmd_spin_dpa3_pt2.py      |  11 +-
 source/lmp/tests/test_lammps_spin_dpa3_pt2.py | 174 +++++++++++++++++-
 source/tests/infer/gen_spin.py                |  80 ++++++++
 3 files changed, 263 insertions(+), 2 deletions(-)

diff --git a/source/lmp/tests/run_mpi_pair_deepmd_spin_dpa3_pt2.py b/source/lmp/tests/run_mpi_pair_deepmd_spin_dpa3_pt2.py
index 3637238968..5befcf0f79 100644
--- a/source/lmp/tests/run_mpi_pair_deepmd_spin_dpa3_pt2.py
+++ b/source/lmp/tests/run_mpi_pair_deepmd_spin_dpa3_pt2.py
@@ -76,6 +76,14 @@
     help="Optional mass for LAMMPS atom type 3 (and any higher types). "
     "Used by the NULL-type fixture; ignored when only 2 types exist.",
 )
+parser.add_argument(
+    "--no-atom-map",
+    action="store_true",
+    help="Omit ``atom_modify map yes`` from the LAMMPS input.  Used by "
+    "the spin no-atom-map fail-fast / with-comm-fallback tests; LAMMPS "
+    "rejects ``atom_modify map no`` so omitting the command is the only "
+    "way to leave the map disabled.",
+)
 args = parser.parse_args()
 
 lammps = PyLammps()
@@ -83,7 +91,8 @@
 lammps.units("metal")
 lammps.boundary("p p p")
 lammps.atom_style("spin")
-lammps.atom_modify("map yes")
+if not args.no_atom_map:
+    lammps.atom_modify("map yes")
 lammps.neighbor("2.0 bin")
 lammps.neigh_modify("every 10 delay 0 check no")
 lammps.read_data(args.DATAFILE)
diff --git a/source/lmp/tests/test_lammps_spin_dpa3_pt2.py b/source/lmp/tests/test_lammps_spin_dpa3_pt2.py
index 7c7c5787a7..5e6a5fbfb5 100644
--- a/source/lmp/tests/test_lammps_spin_dpa3_pt2.py
+++ b/source/lmp/tests/test_lammps_spin_dpa3_pt2.py
@@ -56,6 +56,12 @@
     / "infer"
     / "deeppot_dpa3_spin_mpi.pt2"
 )
+# Single-artifact DPA3 spin fixture (use_loc_mapping=True; no with-comm
+# artifact).  Counterpart to ``pb_file_mpi`` for the spin fail-fast
+# cells where the C++ side has no fallback to border_op.
+pb_file_single = (
+    Path(__file__).parent.parent.parent / "tests" / "infer" / "deeppot_dpa3_spin.pt2"
+)
 data_file = Path(__file__).parent / "data_dpa3_spin_pt2.lmp"
 # Elongated-box fixture for the spin empty-subdomain MPI test: x is
 # extended to 30 A while atoms remain in x in [3, 13]. Combined with
@@ -140,6 +146,8 @@ def _run_mpi_subprocess(
     processors: str | None = None,
     data_path: Path | None = None,
     runner_args: list[str] | None = None,
+    pb_path: Path | None = None,
+    capture: bool = False,
 ) -> dict:
     """Run ``run_mpi_pair_deepmd_spin_dpa3_pt2.py`` under
     ``mpirun -n <nprocs>`` and return
@@ -152,6 +160,8 @@ def _run_mpi_subprocess(
     """
     if data_path is None:
         data_path = data_file
+    if pb_path is None:
+        pb_path = pb_file_mpi
     with tempfile.NamedTemporaryFile(mode="r", suffix=".out", delete=False) as f:
         out_path = f.name
     try:
@@ -162,7 +172,7 @@ def _run_mpi_subprocess(
             sys.executable,
             str(Path(__file__).parent / "run_mpi_pair_deepmd_spin_dpa3_pt2.py"),
             str(data_path.resolve()),
-            str(pb_file_mpi.resolve()),
+            str(pb_path.resolve()),
             out_path,
         ]
         if processors is not None:
@@ -173,6 +183,15 @@ def _run_mpi_subprocess(
             argv.extend(extra_args)
         if runner_args:
             argv.extend(runner_args)
+        if capture:
+            # Used by fail-fast tests: return raw subprocess info instead of
+            # parsing output (the subprocess is expected to exit non-zero).
+            proc = sp.run(argv, capture_output=True, text=True)
+            return {
+                "returncode": proc.returncode,
+                "stdout": proc.stdout,
+                "stderr": proc.stderr,
+            }
         sp.check_call(argv)
         with open(out_path) as fh:
             lines = fh.read().strip().splitlines()
@@ -302,3 +321,156 @@ def test_pair_deepmd_mpi_dpa3_spin_null_type() -> None:
     # real Ni-O atoms).
     np.testing.assert_array_equal(out_mpi["forces"][4:], np.zeros((2, 3)))
     np.testing.assert_array_equal(out_mpi["force_mag"][4:], np.zeros((2, 3)))
+
+
+# ---------------------------------------------------------------------------
+# Four-cell coverage matrix for the spin path — mirrors the non-spin matrix
+# in ``test_lammps_dpa3_pt2.py``.  Verifies the fail-fast in
+# ``DeepSpinPTExpt::compute_inner`` against the silent-corruption bug.
+#
+#   Cell  use_loc_mapping  atom-map  nprocs  Outcome
+#   ----  ---------------  --------  ------  -------------------------------
+#   A     True             yes       1       succeeds (regular w/ map)
+#   B     True             no        1       fail-fast (single-rank msg)
+#   B-mr  True             any       >1      fail-fast (multi-rank msg)
+#   C     False            yes       1       succeeds (regular w/ map; nswap=0)
+#   D     False            no        1       fail-fast (single-rank msg)
+#   D-mr  False            no        >1      succeeds (with-comm; border_op)
+#
+# Cell C-mr is already covered by ``test_pair_deepmd_mpi_dpa3_spin`` above.
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.skipif(
+    shutil.which("mpirun") is None, reason="MPI is not installed on this system"
+)
+@pytest.mark.skipif(
+    importlib.util.find_spec("mpi4py") is None, reason="mpi4py is not installed"
+)
+def test_pair_deepspin_single_artifact_with_atom_map() -> None:
+    """Cell A (spin): use_loc_mapping=True, single-rank, atom-map yes
+    -> regular path with correct mapping, runs cleanly.
+    """
+    out = _run_mpi_subprocess(nprocs=1, pb_path=pb_file_single)
+    # No hardcoded reference here — we only assert the run completes
+    # and produces finite numbers.  Numerical correctness of the
+    # single-artifact spin GNN is validated by the eager-parity test
+    # in source/tests/pt_expt/model/.
+    assert np.isfinite(out["pe"])
+    assert np.all(np.isfinite(out["forces"]))
+    assert np.all(np.isfinite(out["force_mag"]))
+
+
+@pytest.mark.skipif(
+    shutil.which("mpirun") is None, reason="MPI is not installed on this system"
+)
+@pytest.mark.skipif(
+    importlib.util.find_spec("mpi4py") is None, reason="mpi4py is not installed"
+)
+def test_pair_deepspin_no_atom_map_fails_fast() -> None:
+    """Cell B (spin): use_loc_mapping=True, single-rank, no atom-map
+    -> fail-fast with the single-rank message (no with-comm fallback).
+    """
+    out = _run_mpi_subprocess(
+        nprocs=1, pb_path=pb_file_single, runner_args=["--no-atom-map"], capture=True
+    )
+    assert out["returncode"] != 0, (
+        "Expected subprocess to fail-fast for single-rank spin GNN "
+        "without atom-map, but it exited 0."
+    )
+    combined = out["stdout"] + out["stderr"]
+    assert "atom_modify map yes" in combined, (
+        "Expected single-rank fail-fast message, got:\n" + combined[-500:]
+    )
+
+
+@pytest.mark.skipif(
+    shutil.which("mpirun") is None, reason="MPI is not installed on this system"
+)
+@pytest.mark.skipif(
+    importlib.util.find_spec("mpi4py") is None, reason="mpi4py is not installed"
+)
+def test_pair_deepspin_mpi_no_with_comm_fails_fast() -> None:
+    """Cell B-mr (spin): use_loc_mapping=True (no with-comm artifact),
+    multi-rank -> fail-fast with the multi-rank message.  atom-map
+    setting is irrelevant: ``pair_deepspin`` never propagates atom-map
+    for multi-rank, and the new predicate fails-fast unconditionally
+    on atom_map_present when multi_rank && !has_comm_artifact.
+    """
+    out = _run_mpi_subprocess(nprocs=2, pb_path=pb_file_single, capture=True)
+    assert out["returncode"] != 0, (
+        "Expected subprocess to fail-fast for multi-rank spin GNN "
+        "without with-comm artifact, but it exited 0."
+    )
+    combined = out["stdout"] + out["stderr"]
+    assert "use_loc_mapping=False" in combined, (
+        "Expected multi-rank fail-fast message, got:\n" + combined[-500:]
+    )
+
+
+@pytest.mark.skipif(
+    shutil.which("mpirun") is None, reason="MPI is not installed on this system"
+)
+@pytest.mark.skipif(
+    importlib.util.find_spec("mpi4py") is None, reason="mpi4py is not installed"
+)
+def test_pair_deepspin_with_comm_single_atom_map() -> None:
+    """Cell C (spin): use_loc_mapping=False (has with-comm artifact),
+    single-rank, atom-map yes -> regular path takes the artifact's
+    non-comm trace (nswap=0) and uses the LAMMPS atom-map.
+    """
+    out = _run_mpi_subprocess(nprocs=1, pb_path=pb_file_mpi)
+    assert np.isfinite(out["pe"])
+
+
+@pytest.mark.skipif(
+    shutil.which("mpirun") is None, reason="MPI is not installed on this system"
+)
+@pytest.mark.skipif(
+    importlib.util.find_spec("mpi4py") is None, reason="mpi4py is not installed"
+)
+def test_pair_deepspin_with_comm_no_atom_map_fails_fast() -> None:
+    """Cell D (spin): use_loc_mapping=False (with-comm artifact),
+    single-rank, no atom-map -> fail-fast.  Even though with-comm is
+    available, single-rank LAMMPS has nswap=0, so border_op cannot
+    drive the per-layer ghost exchange; the regular path needs the
+    mapping but atom-map is absent.
+    """
+    out = _run_mpi_subprocess(
+        nprocs=1, pb_path=pb_file_mpi, runner_args=["--no-atom-map"], capture=True
+    )
+    assert out["returncode"] != 0, (
+        "Expected subprocess to fail-fast for single-rank spin GNN + "
+        "with-comm artifact + no atom-map, but it exited 0."
+    )
+    combined = out["stdout"] + out["stderr"]
+    assert "atom_modify map yes" in combined, (
+        "Expected single-rank fail-fast message, got:\n" + combined[-500:]
+    )
+
+
+@pytest.mark.skipif(
+    shutil.which("mpirun") is None, reason="MPI is not installed on this system"
+)
+@pytest.mark.skipif(
+    importlib.util.find_spec("mpi4py") is None, reason="mpi4py is not installed"
+)
+def test_pair_deepspin_mpi_no_atom_map() -> None:
+    """Cell D-mr (spin): use_loc_mapping=False (with-comm artifact),
+    multi-rank, no atom-map -> succeeds.  The with-comm path drives
+    ghost-feature exchange via ``deepmd_export::border_op`` and does
+    NOT consume the mapping tensor for ghost gather, so atom-map is
+    unnecessary.  Forces / force_mag must match the same-archive
+    atom-map-yes multi-rank baseline.
+    """
+    out_no_map = _run_mpi_subprocess(
+        nprocs=2, pb_path=pb_file_mpi, runner_args=["--no-atom-map"]
+    )
+    out_baseline = _run_mpi_subprocess(nprocs=2, pb_path=pb_file_mpi)
+    assert out_no_map["pe"] == pytest.approx(out_baseline["pe"], rel=1e-10, abs=1e-12)
+    np.testing.assert_allclose(
+        out_no_map["forces"], out_baseline["forces"], atol=1e-8, rtol=0
+    )
+    np.testing.assert_allclose(
+        out_no_map["force_mag"], out_baseline["force_mag"], atol=1e-8, rtol=0
+    )
diff --git a/source/tests/infer/gen_spin.py b/source/tests/infer/gen_spin.py
index c17546504b..b08d45060d 100644
--- a/source/tests/infer/gen_spin.py
+++ b/source/tests/infer/gen_spin.py
@@ -144,6 +144,68 @@ def _build_dpa3_mpi_yaml(yaml_path: str) -> None:
     save_dp_model(yaml_path, data)
 
 
+def _build_dpa3_single_yaml(yaml_path: str) -> None:
+    """Build a DPA3 spin model with ``use_loc_mapping=True`` — the
+    single-artifact GNN counterpart to ``_build_dpa3_mpi_yaml``.
+
+    ``use_loc_mapping=True`` keeps per-layer messaging local to each
+    rank, so no with-comm AOTI artifact is needed for single-rank
+    inference.  But the regular path still consumes ``mapping`` to
+    gather ghost features, so this fixture is the canonical test
+    case for the cells where the C++ ``DeepSpinPTExpt`` fail-fast
+    must fire (single-rank without atom-map, or multi-rank without
+    a with-comm artifact).
+    """
+    from deepmd.dpmodel.model.model import (
+        get_model,
+    )
+    from deepmd.dpmodel.utils.serialization import (
+        save_dp_model,
+    )
+
+    config = {
+        "type_map": ["Ni", "O"],
+        "descriptor": {
+            "type": "dpa3",
+            "repflow": {
+                "n_dim": 8,
+                "e_dim": 6,
+                "a_dim": 4,
+                "nlayers": 1,
+                "e_rcut": 4.0,
+                "e_rcut_smth": 0.5,
+                "e_sel": 8,
+                "a_rcut": 3.5,
+                "a_rcut_smth": 0.5,
+                "a_sel": 4,
+                "axis_neuron": 4,
+                "update_angle": False,
+            },
+            "use_loc_mapping": True,
+            "precision": "float64",
+            "seed": 1,
+        },
+        "fitting_net": {"neuron": [5, 5, 5], "resnet_dt": True, "seed": 1},
+        "spin": {"use_spin": [True, False], "virtual_scale": [0.3140, 0.0]},
+    }
+
+    model = get_model(copy.deepcopy(config))
+    model_dict = model.serialize()
+
+    data = {
+        "model": model_dict,
+        "model_def_script": config,
+        "backend": "dpmodel",
+        "software": "deepmd-kit",
+        "version": "3.0.0",
+    }
+
+    print(  # noqa: T201
+        f"Building single-artifact DPA3 spin dpmodel and saving to {yaml_path} ..."
+    )
+    save_dp_model(yaml_path, data)
+
+
 def main():
     from deepmd.entrypoints.convert_backend import (
         convert_backend,
@@ -162,6 +224,12 @@ def main():
     yaml_dpa3_path = os.path.join(base_dir, "deeppot_dpa3_spin_mpi.yaml")
     pt2_dpa3_path = os.path.join(base_dir, "deeppot_dpa3_spin_mpi.pt2")
 
+    # Single-artifact GNN spin variant (DPA3 + use_loc_mapping=True).
+    # No with-comm artifact; needed by tests covering the spin fail-fast
+    # cells in test_lammps_spin_dpa3_pt2.py.
+    yaml_dpa3_single_path = os.path.join(base_dir, "deeppot_dpa3_spin.yaml")
+    pt2_dpa3_single_path = os.path.join(base_dir, "deeppot_dpa3_spin.pt2")
+
     # ---- 1. Build .yamls if they don't exist ----
     if not os.path.exists(yaml_path):
         _build_yaml(yaml_path)
@@ -173,6 +241,11 @@ def main():
     else:
         print(f"Using existing {yaml_dpa3_path}")  # noqa: T201
 
+    if not os.path.exists(yaml_dpa3_single_path):
+        _build_dpa3_single_yaml(yaml_dpa3_single_path)
+    else:
+        print(f"Using existing {yaml_dpa3_single_path}")  # noqa: T201
+
     # ---- 2. Convert .yaml -> .pth and .yaml -> .pt2 ----
     # Import deepmd.pt to register the backend (needed for convert_backend)
     import deepmd.pt  # noqa: F401
@@ -185,6 +258,13 @@ def main():
     print(f"Converting to {pt2_path} ...")  # noqa: T201
     convert_backend(INPUT=yaml_path, OUTPUT=pt2_path, atomic_virial=True)
 
+    print(f"Converting to {pt2_dpa3_single_path} ...")  # noqa: T201
+    convert_backend(
+        INPUT=yaml_dpa3_single_path,
+        OUTPUT=pt2_dpa3_single_path,
+        atomic_virial=True,
+    )
+
     print(f"Converting to {pt2_dpa3_path} ...")  # noqa: T201
     convert_backend(INPUT=yaml_dpa3_path, OUTPUT=pt2_dpa3_path, atomic_virial=True)
 

From 818040f76d4c33237aabffb06efb9ed7f3c59d9b Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 22 May 2026 10:00:26 +0800
Subject: [PATCH 5/8] fix(lmp): use lmp_list.nprocs and populate spin atom-map
 for fail-fast

Replaces the ``lmp_list.nswap > 0`` proxy with a direct
``lmp_list.nprocs > 1`` predicate in DeepPotPTExpt / DeepSpinPTExpt
dispatch.  ``atom_style spin`` populates ``nswap`` even in single-rank
runs (to propagate PBC ghost spins), so the proxy was unsound and caused
single-rank spin LAMMPS jobs to be misclassified as multi-rank by the
fail-fast guard.

Also populates ``lmp_list.mapping`` from the LAMMPS atom-map in
``pair_deepspin.cpp`` (mirroring ``pair_deepmd.cpp``), so single-rank
spin .pt2 GNN inference can resolve ghost indices.

Mechanism:
- Add ``int nprocs`` field + ``set_nprocs`` to ``InputNlist`` (and the
  matching ``DP_NlistSetNprocs`` / ``deepmd::hpp::InputNlist::set_nprocs``
  wrappers) so LAMMPS pair styles can forward ``comm->nprocs``.
- ``pair_deepmd.cpp``, ``pair_deepspin.cpp``, and ``fix_dplr.cpp`` now
  call ``lmp_list.set_nprocs(comm->nprocs)``.
- Refactor the ``serialization.py`` ``has_message_passing`` probe to
  walk ``model -> atomic_model -> descriptor`` (per njzjz-bot review),
  so composite atomic models (e.g. LinearAtomicModel) report the flag.
- Rewrite the stale ``world == nullptr`` carve-out comment in the
  identity-mapping fallback blocks.
---
 deepmd/pt_expt/utils/serialization.py | 31 +++++++++++++++++++++------
 source/api_c/include/c_api.h          | 10 +++++++++
 source/api_c/include/deepmd.hpp       |  5 +++++
 source/api_c/src/c_api.cc             |  1 +
 source/api_cc/src/DeepPotPTExpt.cc    | 28 +++++++++++++-----------
 source/api_cc/src/DeepSpinPTExpt.cc   | 15 ++++++++-----
 source/lib/include/neighbor_list.h    | 15 +++++++++++++
 source/lmp/fix_dplr.cpp               |  1 +
 source/lmp/pair_deepmd.cpp            |  1 +
 source/lmp/pair_deepspin.cpp          | 13 +++++++++++
 10 files changed, 96 insertions(+), 24 deletions(-)

diff --git a/deepmd/pt_expt/utils/serialization.py b/deepmd/pt_expt/utils/serialization.py
index d7cb64bc35..2937a2d93f 100644
--- a/deepmd/pt_expt/utils/serialization.py
+++ b/deepmd/pt_expt/utils/serialization.py
@@ -442,6 +442,7 @@ def _collect_metadata(model: torch.nn.Module, is_spin: bool = False) -> dict:
     # (per-layer ghost-feature MPI exchange via deepmd_export::border_op).
     # The C++ DeepPotPTExpt / DeepSpinPTExpt loaders branch on this flag.
     meta["has_comm_artifact"] = _needs_with_comm_artifact(model)
+
     # Whether the model's regular .pt2 graph consumes the ``mapping``
     # tensor to gather per-layer ghost-atom features from local atoms.
     # Mirrors the descriptor's ``has_message_passing()`` API: True for
@@ -450,14 +451,32 @@ def _collect_metadata(model: torch.nn.Module, is_spin: bool = False) -> dict:
     # The C++ side gates its fail-fast on this — an absent mapping is
     # fatal only for models that would silently corrupt ghost features
     # otherwise.
-    desc = getattr(getattr(model, "atomic_model", None), "descriptor", None)
-    if desc is not None and hasattr(desc, "has_message_passing"):
+    #
+    # Lookup order: model -> atomic_model -> descriptor.  Going through
+    # ``atomic_model.has_message_passing()`` is important for composite
+    # atomic models (e.g. ``LinearAtomicModel`` in DP-ZBL) which don't
+    # expose a single ``.descriptor`` but do aggregate the flag across
+    # their sub-models.  ``descriptor.has_message_passing()`` is the
+    # fallback for any future wrapper that lacks the higher-level
+    # methods.
+    def _probe_has_message_passing(obj: object) -> bool | None:
+        if obj is None or not hasattr(obj, "has_message_passing"):
+            return None
         try:
-            meta["has_message_passing"] = bool(desc.has_message_passing())
+            return bool(obj.has_message_passing())
         except (AttributeError, NotImplementedError):
-            meta["has_message_passing"] = False
-    else:
-        meta["has_message_passing"] = False
+            return None
+
+    result: bool | None = None
+    for obj in (
+        model,
+        getattr(model, "atomic_model", None),
+        getattr(getattr(model, "atomic_model", None), "descriptor", None),
+    ):
+        result = _probe_has_message_passing(obj)
+        if result is not None:
+            break
+    meta["has_message_passing"] = result if result is not None else False
     return meta
 
 
diff --git a/source/api_c/include/c_api.h b/source/api_c/include/c_api.h
index 534ae94403..b623c1f670 100644
--- a/source/api_c/include/c_api.h
+++ b/source/api_c/include/c_api.h
@@ -88,6 +88,16 @@ extern void DP_NlistSetMask(DP_Nlist* nl, int mask);
  **/
 extern void DP_NlistSetMapping(DP_Nlist* nl, int* mapping);
 
+/**
+ * @brief Set the number of MPI ranks for a neighbor list.
+ *
+ * @param nl Neighbor list.
+ * @param nprocs Number of MPI ranks (1 = single-rank).
+ * @since API version 26
+ *
+ **/
+extern void DP_NlistSetNprocs(DP_Nlist* nl, int nprocs);
+
 /**
  * @brief Delete a neighbor list.
  *
diff --git a/source/api_c/include/deepmd.hpp b/source/api_c/include/deepmd.hpp
index 2f120ee86c..fc71164db6 100644
--- a/source/api_c/include/deepmd.hpp
+++ b/source/api_c/include/deepmd.hpp
@@ -868,6 +868,11 @@ struct InputNlist {
    * @param mapping mapping from all atoms to real atoms, in size nall.
    */
   void set_mapping(int* mapping) { DP_NlistSetMapping(nl, mapping); };
+  /**
+   * @brief Set the number of MPI ranks for this neighbor list.
+   * @param nprocs Number of MPI ranks (1 = single-rank).
+   */
+  void set_nprocs(int nprocs) { DP_NlistSetNprocs(nl, nprocs); };
 };
 
 /**
diff --git a/source/api_c/src/c_api.cc b/source/api_c/src/c_api.cc
index 3646eb33c5..719142681d 100644
--- a/source/api_c/src/c_api.cc
+++ b/source/api_c/src/c_api.cc
@@ -46,6 +46,7 @@ void DP_NlistSetMask(DP_Nlist* nl, int mask) { nl->nl.set_mask(mask); }
 void DP_NlistSetMapping(DP_Nlist* nl, int* mapping) {
   nl->nl.set_mapping(mapping);
 }
+void DP_NlistSetNprocs(DP_Nlist* nl, int nprocs) { nl->nl.set_nprocs(nprocs); }
 void DP_DeleteNlist(DP_Nlist* nl) { delete nl; }
 
 // DP Base Model
diff --git a/source/api_cc/src/DeepPotPTExpt.cc b/source/api_cc/src/DeepPotPTExpt.cc
index b4b830628a..9f417957b9 100644
--- a/source/api_cc/src/DeepPotPTExpt.cc
+++ b/source/api_cc/src/DeepPotPTExpt.cc
@@ -366,11 +366,11 @@ void DeepPotPTExpt::compute(ENERGYVTYPE& ener,
           .to(device);
 
   // Dispatch decision: use the with-comm artifact when LAMMPS is running
-  // multi-rank.  ``lmp_list.nswap > 0`` is the proxy for "multi-rank with
-  // cross-domain communication"; in single-rank LAMMPS (processors 1 1 1,
-  // including with PBC) the C++ side sees nswap == 0.  api_cc is not
-  // linked against MPI directly, so we cannot call MPI_Comm_size; the
-  // proxy is set by LAMMPS's CommBrick at setup time.
+  // multi-rank.  ``lmp_list.nprocs > 1`` is the direct predicate;
+  // ``pair_deepmd.cpp`` populates it via ``set_nprocs(comm->nprocs)``.
+  // Earlier drafts used ``nswap > 0`` as a proxy, but that breaks for
+  // ``atom_style spin`` (which emits nswap > 0 even in single-rank to
+  // propagate PBC ghost spins).  ``nprocs`` is unambiguous.
   //
   // The regular artifact uses ``mapping`` to gather ghost-atom features
   // from local-atom embeddings (``index_select(node_ebd[1, nloc, dim],
@@ -379,7 +379,7 @@ void DeepPotPTExpt::compute(ENERGYVTYPE& ener,
   // mapping — applies uniformly to every caller (LAMMPS pair, ctest
   // fixtures, direct C++ API users).  Callers that want the regular
   // path must populate ``lmp_list.mapping``.
-  bool multi_rank = (lmp_list.nswap > 0);
+  bool multi_rank = (lmp_list.nprocs > 1);
   bool atom_map_present = (lmp_list.mapping != nullptr);
   bool use_with_comm = has_comm_artifact_ && multi_rank;
   // Decision matrix (see PR #5450 description):
@@ -428,13 +428,15 @@ void DeepPotPTExpt::compute(ENERGYVTYPE& ener,
               .clone()
               .to(device);
     } else {
-      // Identity fallback reached only on the with-comm path (where the
-      // model graph fills ghost features via border_op and ignores this
-      // tensor for ghost gather — see deepmd/pt_expt/descriptor/
-      // repflows.py::_exchange_ghosts) or for trusted direct C++ callers
-      // (world == nullptr, see the dispatch carve-out above).  Any other
-      // path that reaches here would have been rejected by the fail-fast
-      // throw, so identity values are safe.
+      // Identity fallback.  The fail-fast above guarantees we only
+      // reach this branch when one of these is true:
+      //   - The model is non-message-passing (mapping is unused).
+      //   - ``nghost == 0`` (no ghosts to gather, identity is trivially
+      //     correct).
+      //   - ``use_with_comm`` is true (the with-comm graph fills ghost
+      //     features via border_op and ignores this tensor for ghost
+      //     gather — see deepmd/pt_expt/descriptor/
+      //     repflows.py::_exchange_ghosts).
       std::vector<std::int64_t> mapping(nall_real);
       for (int ii = 0; ii < nall_real; ii++) {
         mapping[ii] = ii;
diff --git a/source/api_cc/src/DeepSpinPTExpt.cc b/source/api_cc/src/DeepSpinPTExpt.cc
index a840be9741..4101655eaf 100644
--- a/source/api_cc/src/DeepSpinPTExpt.cc
+++ b/source/api_cc/src/DeepSpinPTExpt.cc
@@ -381,7 +381,11 @@ void DeepSpinPTExpt::compute(ENERGYVTYPE& ener,
   // ghost→local mapping); multi-rank without a with-comm artifact cannot
   // drive border_op (no inter-rank exchange tensor).  Both unsupported
   // combinations fail-fast for every caller.
-  bool multi_rank = (lmp_list.nswap > 0);
+  // ``nprocs > 1`` is the direct multi-rank predicate (set by
+  // pair_deepspin via ``lmp_list.set_nprocs(comm->nprocs)``).  Earlier
+  // drafts used ``nswap > 0`` as a proxy, but atom_style spin emits
+  // nswap > 0 even in single-rank, so the proxy is unsound.
+  bool multi_rank = (lmp_list.nprocs > 1);
   bool atom_map_present = (lmp_list.mapping != nullptr);
   bool use_with_comm = has_comm_artifact_ && multi_rank;
   // See DeepPotPTExpt::compute_inner for the decision-matrix rationale.
@@ -423,10 +427,11 @@ void DeepSpinPTExpt::compute(ENERGYVTYPE& ener,
               .clone()
               .to(device);
     } else {
-      // Identity fallback: only reached on the with-comm path (which
-      // fills ghost features via border_op and ignores this tensor for
-      // ghost gather) or for trusted direct C++ callers (world ==
-      // nullptr).  Other paths were rejected by the fail-fast above.
+      // Identity fallback.  See DeepPotPTExpt::compute_inner for the
+      // invariant rationale: this branch is only reached when the
+      // model is non-message-passing, nghost==0, or use_with_comm is
+      // true (border_op fills ghosts); other configurations were
+      // rejected by the fail-fast above.
       std::vector<std::int64_t> mapping(nall_real);
       for (int ii = 0; ii < nall_real; ii++) {
         mapping[ii] = ii;
diff --git a/source/lib/include/neighbor_list.h b/source/lib/include/neighbor_list.h
index 5b39ea7454..18796b7762 100644
--- a/source/lib/include/neighbor_list.h
+++ b/source/lib/include/neighbor_list.h
@@ -46,6 +46,12 @@ struct InputNlist {
   int mask = 0xFFFFFFFF;
   /// mapping from all atoms to real atoms, in the size of nall
   int* mapping = nullptr;
+  /// number of MPI ranks (1 = single-rank).  Populated by LAMMPS pair
+  /// styles from ``comm->nprocs``; defaults to 1 for direct C++
+  /// callers that don't set it.  Use this — NOT ``nswap > 0`` — as the
+  /// "is multi-rank?" predicate: ``atom_style spin`` and some other
+  /// LAMMPS configurations populate ``nswap`` even in single-rank.
+  int nprocs = 1;
   InputNlist()
       : inum(0),
         ilist(NULL),
@@ -105,6 +111,15 @@ struct InputNlist {
    * @brief Set mapping for this neighbor list.
    */
   void set_mapping(int* mapping_) { mapping = mapping_; };
+  /**
+   * @brief Set the MPI rank count for this neighbor list.
+   *
+   * Used by ``DeepPotPTExpt`` / ``DeepSpinPTExpt`` to decide whether
+   * the regular or with-comm artifact should run.  Pair styles must
+   * call this with ``comm->nprocs``; without it the C++ side will
+   * treat the call as single-rank.
+   */
+  void set_nprocs(int nprocs_) { nprocs = nprocs_; };
 };
 
 /**
diff --git a/source/lmp/fix_dplr.cpp b/source/lmp/fix_dplr.cpp
index e595e1624b..632fb630d5 100644
--- a/source/lmp/fix_dplr.cpp
+++ b/source/lmp/fix_dplr.cpp
@@ -507,6 +507,7 @@ void FixDPLR::pre_force(int vflag) {
   deepmd_compat::InputNlist lmp_list(list->inum, list->ilist, list->numneigh,
                                      list->firstneigh);
   lmp_list.set_mask(NEIGHMASK);
+  lmp_list.set_nprocs(comm->nprocs);
   if (comm->nprocs == 1 && atom->map_style != Atom::MAP_NONE) {
     lmp_list.set_mapping(mapping_vec.data());
   }
diff --git a/source/lmp/pair_deepmd.cpp b/source/lmp/pair_deepmd.cpp
index 9ea2021570..da4f013f72 100644
--- a/source/lmp/pair_deepmd.cpp
+++ b/source/lmp/pair_deepmd.cpp
@@ -239,6 +239,7 @@ void PairDeepMD::compute(int eflag, int vflag) {
         commdata_->firstrecv, commdata_->sendlist, commdata_->sendproc,
         commdata_->recvproc, &world);
     lmp_list.set_mask(NEIGHMASK);
+    lmp_list.set_nprocs(comm->nprocs);
     if (comm->nprocs == 1 && atom->map_style != Atom::MAP_NONE) {
       lmp_list.set_mapping(mapping_vec.data());
     }
diff --git a/source/lmp/pair_deepspin.cpp b/source/lmp/pair_deepspin.cpp
index eddcb2eef4..8ec9edc997 100644
--- a/source/lmp/pair_deepspin.cpp
+++ b/source/lmp/pair_deepspin.cpp
@@ -201,6 +201,15 @@ void PairDeepSpin::compute(int eflag, int vflag) {
     }
   }
 
+  // mapping (for DPA-2/3 .pt2 GNN models that gather ghost features via
+  // the LAMMPS atom-map; harmless for other models).
+  std::vector<int> mapping_vec(nall, -1);
+  if (comm->nprocs == 1 && atom->map_style != Atom::MAP_NONE) {
+    for (size_t ii = 0; ii < nall; ++ii) {
+      mapping_vec[ii] = atom->map(atom->tag[ii]);
+    }
+  }
+
   if (do_compute_aparam) {
     make_aparam_from_compute(daparam);
   } else if (aparam.size() > 0) {
@@ -244,6 +253,10 @@ void PairDeepSpin::compute(int eflag, int vflag) {
         commdata_->firstrecv, commdata_->sendlist, commdata_->sendproc,
         commdata_->recvproc, &world);
     lmp_list.set_mask(NEIGHMASK);
+    lmp_list.set_nprocs(comm->nprocs);
+    if (comm->nprocs == 1 && atom->map_style != Atom::MAP_NONE) {
+      lmp_list.set_mapping(mapping_vec.data());
+    }
     if (single_model || multi_models_no_mod_devi) {
       // cvflag_atom is the right flag for the cvatom matrix
       if (!(eflag_atom || cvflag_atom)) {

From 4fcd312127170b17d16c51a557f2a682403f8597 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 22 May 2026 13:35:46 +0800
Subject: [PATCH 6/8] refactor(nlist): move nprocs into InputNlist constructors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the ``set_nprocs(comm->nprocs)`` setter with a constructor
parameter on both ``InputNlist`` overloads (lightweight and comm-aware)
and the matching ``DP_NewNlist`` / ``DP_NewNlist_comm`` C entry points
plus the hpp wrappers.  ``nprocs`` is a conceptual sibling of ``world``
and ``nswap`` and lives more naturally in the initializer than as a
post-construction setter.  Default value 1 keeps direct C++ API
consumers source-compatible — they need not pass anything.

- ``InputNlist`` lightweight ctor: new trailing ``int nprocs_ = 1``.
- ``InputNlist`` comm-aware ctor: new trailing ``int nprocs_ = 1``.
- ``DP_NewNlist`` / ``DP_NewNlist_comm``: new trailing ``int nprocs``
  arg (no default — C API).
- ``deepmd::hpp::InputNlist`` ctors: new trailing ``int nprocs = 1``.
- ``DP_NlistSetNprocs`` / ``set_nprocs`` removed (single-use in-tree).
- ``pair_deepmd.cpp``, ``pair_deepspin.cpp``, ``fix_dplr.cpp`` now pass
  ``comm->nprocs`` directly in the constructor call.
- Update stale comments in ``DeepPotPTExpt.cc`` and ``DeepSpinPTExpt.cc``
  to reference the constructor parameter instead of the removed setter.
---
 source/api_c/include/c_api.h        | 22 +++++++-------------
 source/api_c/include/deepmd.hpp     | 20 ++++++++----------
 source/api_c/src/c_api.cc           | 18 ++++++++--------
 source/api_cc/src/DeepPotPTExpt.cc  |  9 ++++----
 source/api_cc/src/DeepSpinPTExpt.cc |  9 ++++----
 source/lib/include/neighbor_list.h  | 32 ++++++++++++++---------------
 source/lmp/fix_dplr.cpp             |  3 +--
 source/lmp/pair_deepmd.cpp          |  3 +--
 source/lmp/pair_deepspin.cpp        |  3 +--
 9 files changed, 53 insertions(+), 66 deletions(-)

diff --git a/source/api_c/include/c_api.h b/source/api_c/include/c_api.h
index b623c1f670..15b79a34ae 100644
--- a/source/api_c/include/c_api.h
+++ b/source/api_c/include/c_api.h
@@ -27,10 +27,8 @@ typedef struct DP_Nlist DP_Nlist;
  * @param[in] Array stores the core region atom's neighbor index
  * @returns A pointer to the neighbor list.
  **/
-extern DP_Nlist* DP_NewNlist(int inum_,
-                             int* ilist_,
-                             int* numneigh_,
-                             int** firstneigh_);
+extern DP_Nlist* DP_NewNlist(
+    int inum_, int* ilist_, int* numneigh_, int** firstneigh_, int nprocs);
 /**
  * @brief Create a new neighbor list with communication capabilities.
  * @details This function extends DP_NewNlist by adding support for parallel
@@ -52,6 +50,9 @@ extern DP_Nlist* DP_NewNlist(int inum_,
  * each swap.
  * @param[in] world Pointer to the MPI communicator or similar communication
  * world used for the operation.
+ * @param[in] nprocs Number of MPI ranks (1 = single-rank).  Used by
+ * ``DeepPotPTExpt`` / ``DeepSpinPTExpt`` to choose between the regular
+ * and with-comm artifacts.  Defaults to 1 if not supplied.
  * @returns A pointer to the initialized neighbor list with communication
  * capabilities.
  */
@@ -66,7 +67,8 @@ extern DP_Nlist* DP_NewNlist_comm(int inum_,
                                   int** sendlist,
                                   int* sendproc,
                                   int* recvproc,
-                                  void* world);
+                                  void* world,
+                                  int nprocs);
 
 /**
  * @brief Set mask for a neighbor list.
@@ -88,16 +90,6 @@ extern void DP_NlistSetMask(DP_Nlist* nl, int mask);
  **/
 extern void DP_NlistSetMapping(DP_Nlist* nl, int* mapping);
 
-/**
- * @brief Set the number of MPI ranks for a neighbor list.
- *
- * @param nl Neighbor list.
- * @param nprocs Number of MPI ranks (1 = single-rank).
- * @since API version 26
- *
- **/
-extern void DP_NlistSetNprocs(DP_Nlist* nl, int nprocs);
-
 /**
  * @brief Delete a neighbor list.
  *
diff --git a/source/api_c/include/deepmd.hpp b/source/api_c/include/deepmd.hpp
index fc71164db6..b07d955ea0 100644
--- a/source/api_c/include/deepmd.hpp
+++ b/source/api_c/include/deepmd.hpp
@@ -809,15 +809,16 @@ struct InputNlist {
         ilist(nullptr),
         numneigh(nullptr),
         firstneigh(nullptr),
-        nl(DP_NewNlist(0, nullptr, nullptr, nullptr)) {
+        nl(DP_NewNlist(0, nullptr, nullptr, nullptr, 1)) {
     DP_CHECK_OK(DP_NlistCheckOK, nl);
   };
-  InputNlist(int inum_, int* ilist_, int* numneigh_, int** firstneigh_)
+  InputNlist(
+      int inum_, int* ilist_, int* numneigh_, int** firstneigh_, int nprocs = 1)
       : inum(inum_),
         ilist(ilist_),
         numneigh(numneigh_),
         firstneigh(firstneigh_),
-        nl(DP_NewNlist(inum_, ilist_, numneigh_, firstneigh_)) {
+        nl(DP_NewNlist(inum_, ilist_, numneigh_, firstneigh_, nprocs)) {
     DP_CHECK_OK(DP_NlistCheckOK, nl);
   };
   InputNlist(int inum_,
@@ -831,7 +832,8 @@ struct InputNlist {
              int** sendlist,
              int* sendproc,
              int* recvproc,
-             void* world)
+             void* world,
+             int nprocs = 1)
       : inum(inum_),
         ilist(ilist_),
         numneigh(numneigh_),
@@ -847,7 +849,8 @@ struct InputNlist {
                             sendlist,
                             sendproc,
                             recvproc,
-                            world)) {};
+                            world,
+                            nprocs)) {};
   ~InputNlist() { DP_DeleteNlist(nl); };
   /// @brief C API neighbor list.
   DP_Nlist* nl;
@@ -868,11 +871,6 @@ struct InputNlist {
    * @param mapping mapping from all atoms to real atoms, in size nall.
    */
   void set_mapping(int* mapping) { DP_NlistSetMapping(nl, mapping); };
-  /**
-   * @brief Set the number of MPI ranks for this neighbor list.
-   * @param nprocs Number of MPI ranks (1 = single-rank).
-   */
-  void set_nprocs(int nprocs) { DP_NlistSetNprocs(nl, nprocs); };
 };
 
 /**
@@ -900,7 +898,7 @@ void inline convert_nlist(InputNlist& to_nlist,
   // delete the original nl
   DP_DeleteNlist(to_nlist.nl);
   to_nlist.nl = DP_NewNlist(to_nlist.inum, to_nlist.ilist, to_nlist.numneigh,
-                            to_nlist.firstneigh);
+                            to_nlist.firstneigh, 1);
 }
 /**
  * @brief Deep Potential Base Model.
diff --git a/source/api_c/src/c_api.cc b/source/api_c/src/c_api.cc
index 719142681d..df1959a013 100644
--- a/source/api_c/src/c_api.cc
+++ b/source/api_c/src/c_api.cc
@@ -16,12 +16,10 @@ extern "C" {
 DP_Nlist::DP_Nlist() {}
 DP_Nlist::DP_Nlist(deepmd::InputNlist& nl) : nl(nl) {}
 
-DP_Nlist* DP_NewNlist(int inum_,
-                      int* ilist_,
-                      int* numneigh_,
-                      int** firstneigh_) {
-  DP_NEW_OK(DP_Nlist,
-            deepmd::InputNlist nl(inum_, ilist_, numneigh_, firstneigh_);
+DP_Nlist* DP_NewNlist(
+    int inum_, int* ilist_, int* numneigh_, int** firstneigh_, int nprocs) {
+  DP_NEW_OK(DP_Nlist, deepmd::InputNlist nl(inum_, ilist_, numneigh_,
+                                            firstneigh_, nprocs);
             DP_Nlist* new_nl = new DP_Nlist(nl); return new_nl;)
 }
 DP_Nlist* DP_NewNlist_comm(int inum_,
@@ -35,10 +33,11 @@ DP_Nlist* DP_NewNlist_comm(int inum_,
                            int** sendlist,
                            int* sendproc,
                            int* recvproc,
-                           void* world) {
+                           void* world,
+                           int nprocs) {
   deepmd::InputNlist nl(inum_, ilist_, numneigh_, firstneigh_, nswap, sendnum,
-                        recvnum, firstrecv, sendlist, sendproc, recvproc,
-                        world);
+                        recvnum, firstrecv, sendlist, sendproc, recvproc, world,
+                        nprocs);
   DP_Nlist* new_nl = new DP_Nlist(nl);
   return new_nl;
 }
@@ -46,7 +45,6 @@ void DP_NlistSetMask(DP_Nlist* nl, int mask) { nl->nl.set_mask(mask); }
 void DP_NlistSetMapping(DP_Nlist* nl, int* mapping) {
   nl->nl.set_mapping(mapping);
 }
-void DP_NlistSetNprocs(DP_Nlist* nl, int nprocs) { nl->nl.set_nprocs(nprocs); }
 void DP_DeleteNlist(DP_Nlist* nl) { delete nl; }
 
 // DP Base Model
diff --git a/source/api_cc/src/DeepPotPTExpt.cc b/source/api_cc/src/DeepPotPTExpt.cc
index 9f417957b9..880eaabeb0 100644
--- a/source/api_cc/src/DeepPotPTExpt.cc
+++ b/source/api_cc/src/DeepPotPTExpt.cc
@@ -367,10 +367,11 @@ void DeepPotPTExpt::compute(ENERGYVTYPE& ener,
 
   // Dispatch decision: use the with-comm artifact when LAMMPS is running
   // multi-rank.  ``lmp_list.nprocs > 1`` is the direct predicate;
-  // ``pair_deepmd.cpp`` populates it via ``set_nprocs(comm->nprocs)``.
-  // Earlier drafts used ``nswap > 0`` as a proxy, but that breaks for
-  // ``atom_style spin`` (which emits nswap > 0 even in single-rank to
-  // propagate PBC ghost spins).  ``nprocs`` is unambiguous.
+  // LAMMPS pair styles populate it by passing ``comm->nprocs`` to the
+  // ``InputNlist`` constructor.  Earlier drafts used ``nswap > 0`` as a
+  // proxy, but that breaks for ``atom_style spin`` (which emits
+  // nswap > 0 even in single-rank to propagate PBC ghost spins).
+  // ``nprocs`` is unambiguous.
   //
   // The regular artifact uses ``mapping`` to gather ghost-atom features
   // from local-atom embeddings (``index_select(node_ebd[1, nloc, dim],
diff --git a/source/api_cc/src/DeepSpinPTExpt.cc b/source/api_cc/src/DeepSpinPTExpt.cc
index 4101655eaf..4cd7319ff2 100644
--- a/source/api_cc/src/DeepSpinPTExpt.cc
+++ b/source/api_cc/src/DeepSpinPTExpt.cc
@@ -381,10 +381,11 @@ void DeepSpinPTExpt::compute(ENERGYVTYPE& ener,
   // ghost→local mapping); multi-rank without a with-comm artifact cannot
   // drive border_op (no inter-rank exchange tensor).  Both unsupported
   // combinations fail-fast for every caller.
-  // ``nprocs > 1`` is the direct multi-rank predicate (set by
-  // pair_deepspin via ``lmp_list.set_nprocs(comm->nprocs)``).  Earlier
-  // drafts used ``nswap > 0`` as a proxy, but atom_style spin emits
-  // nswap > 0 even in single-rank, so the proxy is unsound.
+  // ``nprocs > 1`` is the direct multi-rank predicate (LAMMPS pair
+  // styles set it by passing ``comm->nprocs`` to the ``InputNlist``
+  // constructor).  Earlier drafts used ``nswap > 0`` as a proxy, but
+  // atom_style spin emits nswap > 0 even in single-rank, so the proxy
+  // is unsound.
   bool multi_rank = (lmp_list.nprocs > 1);
   bool atom_map_present = (lmp_list.mapping != nullptr);
   bool use_with_comm = has_comm_artifact_ && multi_rank;
diff --git a/source/lib/include/neighbor_list.h b/source/lib/include/neighbor_list.h
index 18796b7762..41a5bbcc12 100644
--- a/source/lib/include/neighbor_list.h
+++ b/source/lib/include/neighbor_list.h
@@ -46,9 +46,11 @@ struct InputNlist {
   int mask = 0xFFFFFFFF;
   /// mapping from all atoms to real atoms, in the size of nall
   int* mapping = nullptr;
-  /// number of MPI ranks (1 = single-rank).  Populated by LAMMPS pair
-  /// styles from ``comm->nprocs``; defaults to 1 for direct C++
-  /// callers that don't set it.  Use this — NOT ``nswap > 0`` — as the
+  /// number of MPI ranks (1 = single-rank).  Passed by LAMMPS pair
+  /// styles as the trailing ``nprocs_`` argument of the comm-aware
+  /// constructor (sourced from ``comm->nprocs``); defaults to 1 for
+  /// direct C++ callers and for the lightweight constructors that
+  /// don't carry comm metadata.  Use this — NOT ``nswap > 0`` — as the
   /// "is multi-rank?" predicate: ``atom_style spin`` and some other
   /// LAMMPS configurations populate ``nswap`` even in single-rank.
   int nprocs = 1;
@@ -65,7 +67,11 @@ struct InputNlist {
         sendproc(nullptr),
         recvproc(nullptr),
         world(0) {};
-  InputNlist(int inum_, int* ilist_, int* numneigh_, int** firstneigh_)
+  InputNlist(int inum_,
+             int* ilist_,
+             int* numneigh_,
+             int** firstneigh_,
+             int nprocs_ = 1)
       : inum(inum_),
         ilist(ilist_),
         numneigh(numneigh_),
@@ -77,7 +83,8 @@ struct InputNlist {
         sendlist(nullptr),
         sendproc(nullptr),
         recvproc(nullptr),
-        world(0) {};
+        world(0),
+        nprocs(nprocs_) {};
   InputNlist(int inum_,
              int* ilist_,
              int* numneigh_,
@@ -89,7 +96,8 @@ struct InputNlist {
              int** sendlist,
              int* sendproc,
              int* recvproc,
-             void* world)
+             void* world,
+             int nprocs_ = 1)
       : inum(inum_),
         ilist(ilist_),
         numneigh(numneigh_),
@@ -101,7 +109,8 @@ struct InputNlist {
         sendlist(sendlist),
         sendproc(sendproc),
         recvproc(recvproc),
-        world(world) {};
+        world(world),
+        nprocs(nprocs_) {};
   ~InputNlist() {};
   /**
    * @brief Set mask for this neighbor list.
@@ -111,15 +120,6 @@ struct InputNlist {
    * @brief Set mapping for this neighbor list.
    */
   void set_mapping(int* mapping_) { mapping = mapping_; };
-  /**
-   * @brief Set the MPI rank count for this neighbor list.
-   *
-   * Used by ``DeepPotPTExpt`` / ``DeepSpinPTExpt`` to decide whether
-   * the regular or with-comm artifact should run.  Pair styles must
-   * call this with ``comm->nprocs``; without it the C++ side will
-   * treat the call as single-rank.
-   */
-  void set_nprocs(int nprocs_) { nprocs = nprocs_; };
 };
 
 /**
diff --git a/source/lmp/fix_dplr.cpp b/source/lmp/fix_dplr.cpp
index 632fb630d5..e31a71d519 100644
--- a/source/lmp/fix_dplr.cpp
+++ b/source/lmp/fix_dplr.cpp
@@ -505,9 +505,8 @@ void FixDPLR::pre_force(int vflag) {
   // get lammps nlist
   NeighList* list = pair_deepmd->list;
   deepmd_compat::InputNlist lmp_list(list->inum, list->ilist, list->numneigh,
-                                     list->firstneigh);
+                                     list->firstneigh, comm->nprocs);
   lmp_list.set_mask(NEIGHMASK);
-  lmp_list.set_nprocs(comm->nprocs);
   if (comm->nprocs == 1 && atom->map_style != Atom::MAP_NONE) {
     lmp_list.set_mapping(mapping_vec.data());
   }
diff --git a/source/lmp/pair_deepmd.cpp b/source/lmp/pair_deepmd.cpp
index da4f013f72..12b2b5a538 100644
--- a/source/lmp/pair_deepmd.cpp
+++ b/source/lmp/pair_deepmd.cpp
@@ -237,9 +237,8 @@ void PairDeepMD::compute(int eflag, int vflag) {
         list->inum, list->ilist, list->numneigh, list->firstneigh,
         commdata_->nswap, commdata_->sendnum, commdata_->recvnum,
         commdata_->firstrecv, commdata_->sendlist, commdata_->sendproc,
-        commdata_->recvproc, &world);
+        commdata_->recvproc, &world, comm->nprocs);
     lmp_list.set_mask(NEIGHMASK);
-    lmp_list.set_nprocs(comm->nprocs);
     if (comm->nprocs == 1 && atom->map_style != Atom::MAP_NONE) {
       lmp_list.set_mapping(mapping_vec.data());
     }
diff --git a/source/lmp/pair_deepspin.cpp b/source/lmp/pair_deepspin.cpp
index 8ec9edc997..30ca48576f 100644
--- a/source/lmp/pair_deepspin.cpp
+++ b/source/lmp/pair_deepspin.cpp
@@ -251,9 +251,8 @@ void PairDeepSpin::compute(int eflag, int vflag) {
         list->inum, list->ilist, list->numneigh, list->firstneigh,
         commdata_->nswap, commdata_->sendnum, commdata_->recvnum,
         commdata_->firstrecv, commdata_->sendlist, commdata_->sendproc,
-        commdata_->recvproc, &world);
+        commdata_->recvproc, &world, comm->nprocs);
     lmp_list.set_mask(NEIGHMASK);
-    lmp_list.set_nprocs(comm->nprocs);
     if (comm->nprocs == 1 && atom->map_style != Atom::MAP_NONE) {
       lmp_list.set_mapping(mapping_vec.data());
     }

From 28e41b04abb674e4b29b979808dde5ee94b5a3dd Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 22 May 2026 14:01:34 +0800
Subject: [PATCH 7/8] test(lmp): add virial parity assertion in spin
 no-atom-map MPI test

Reviewer ask (njzjz-bot / coderabbitai on PR #5450): the
``test_pair_deepspin_mpi_no_atom_map`` parity check compared energy,
forces, and force_mag against the with-atom-map multi-rank baseline
but skipped virials.  Add the matching ``virials`` assertion so a
regression in the spin with-comm path that affects only the virial
tensor cannot pass silently.  Other spin MPI parity tests in this
file already assert virials at the same tolerance.
---
 source/lmp/tests/test_lammps_spin_dpa3_pt2.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/source/lmp/tests/test_lammps_spin_dpa3_pt2.py b/source/lmp/tests/test_lammps_spin_dpa3_pt2.py
index 5e6a5fbfb5..5429fbb516 100644
--- a/source/lmp/tests/test_lammps_spin_dpa3_pt2.py
+++ b/source/lmp/tests/test_lammps_spin_dpa3_pt2.py
@@ -474,3 +474,6 @@ def test_pair_deepspin_mpi_no_atom_map() -> None:
     np.testing.assert_allclose(
         out_no_map["force_mag"], out_baseline["force_mag"], atol=1e-8, rtol=0
     )
+    np.testing.assert_allclose(
+        out_no_map["virials"], out_baseline["virials"], atol=1e-8, rtol=0
+    )

From d4a9e5dcbe335f56d193c8e04d051f0d526a0a58 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 22 May 2026 14:27:02 +0800
Subject: [PATCH 8/8] refactor(nlist): drop nprocs param from 4-arg ctor;
 mirror matrix comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The lightweight 4-arg ``InputNlist`` (and matching ``DP_NewNlist`` /
hpp wrapper) takes neither ``world`` nor any swap arrays, so it cannot
drive the with-comm dispatch path even when constructed in a multi-rank
LAMMPS context.  The earlier addition of an ``int nprocs_ = 1``
parameter was therefore write-only — passing ``comm->nprocs`` to it
(as ``fix_dplr.cpp`` did) had no observable effect on dispatch.  Drop
the parameter to restore the original ABI of the 4-arg form and make
the contract honest: this overload is always single-rank-classified.
Multi-rank GNN dispatch goes through the 12-arg comm-aware constructor.

The ``nprocs`` data member stays (default 1), settable only via the
comm-aware constructor's trailing ``nprocs_`` argument.

Also mirror the decision-matrix comment from ``DeepPotPTExpt.cc`` into
``DeepSpinPTExpt.cc`` so the spin path documents the four-cell rationale
in-line rather than redirecting the reader to the non-spin file.
---
 source/api_c/include/c_api.h        |  6 ++++--
 source/api_c/include/deepmd.hpp     |  9 ++++-----
 source/api_c/src/c_api.cc           | 10 ++++++----
 source/api_cc/src/DeepSpinPTExpt.cc |  9 ++++++++-
 source/lib/include/neighbor_list.h  | 24 ++++++++++--------------
 source/lmp/fix_dplr.cpp             |  2 +-
 6 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/source/api_c/include/c_api.h b/source/api_c/include/c_api.h
index 15b79a34ae..358480b0ad 100644
--- a/source/api_c/include/c_api.h
+++ b/source/api_c/include/c_api.h
@@ -27,8 +27,10 @@ typedef struct DP_Nlist DP_Nlist;
  * @param[in] Array stores the core region atom's neighbor index
  * @returns A pointer to the neighbor list.
  **/
-extern DP_Nlist* DP_NewNlist(
-    int inum_, int* ilist_, int* numneigh_, int** firstneigh_, int nprocs);
+extern DP_Nlist* DP_NewNlist(int inum_,
+                             int* ilist_,
+                             int* numneigh_,
+                             int** firstneigh_);
 /**
  * @brief Create a new neighbor list with communication capabilities.
  * @details This function extends DP_NewNlist by adding support for parallel
diff --git a/source/api_c/include/deepmd.hpp b/source/api_c/include/deepmd.hpp
index b07d955ea0..c3ca40b75f 100644
--- a/source/api_c/include/deepmd.hpp
+++ b/source/api_c/include/deepmd.hpp
@@ -809,16 +809,15 @@ struct InputNlist {
         ilist(nullptr),
         numneigh(nullptr),
         firstneigh(nullptr),
-        nl(DP_NewNlist(0, nullptr, nullptr, nullptr, 1)) {
+        nl(DP_NewNlist(0, nullptr, nullptr, nullptr)) {
     DP_CHECK_OK(DP_NlistCheckOK, nl);
   };
-  InputNlist(
-      int inum_, int* ilist_, int* numneigh_, int** firstneigh_, int nprocs = 1)
+  InputNlist(int inum_, int* ilist_, int* numneigh_, int** firstneigh_)
       : inum(inum_),
         ilist(ilist_),
         numneigh(numneigh_),
         firstneigh(firstneigh_),
-        nl(DP_NewNlist(inum_, ilist_, numneigh_, firstneigh_, nprocs)) {
+        nl(DP_NewNlist(inum_, ilist_, numneigh_, firstneigh_)) {
     DP_CHECK_OK(DP_NlistCheckOK, nl);
   };
   InputNlist(int inum_,
@@ -898,7 +897,7 @@ void inline convert_nlist(InputNlist& to_nlist,
   // delete the original nl
   DP_DeleteNlist(to_nlist.nl);
   to_nlist.nl = DP_NewNlist(to_nlist.inum, to_nlist.ilist, to_nlist.numneigh,
-                            to_nlist.firstneigh, 1);
+                            to_nlist.firstneigh);
 }
 /**
  * @brief Deep Potential Base Model.
diff --git a/source/api_c/src/c_api.cc b/source/api_c/src/c_api.cc
index df1959a013..b0e789648e 100644
--- a/source/api_c/src/c_api.cc
+++ b/source/api_c/src/c_api.cc
@@ -16,10 +16,12 @@ extern "C" {
 DP_Nlist::DP_Nlist() {}
 DP_Nlist::DP_Nlist(deepmd::InputNlist& nl) : nl(nl) {}
 
-DP_Nlist* DP_NewNlist(
-    int inum_, int* ilist_, int* numneigh_, int** firstneigh_, int nprocs) {
-  DP_NEW_OK(DP_Nlist, deepmd::InputNlist nl(inum_, ilist_, numneigh_,
-                                            firstneigh_, nprocs);
+DP_Nlist* DP_NewNlist(int inum_,
+                      int* ilist_,
+                      int* numneigh_,
+                      int** firstneigh_) {
+  DP_NEW_OK(DP_Nlist,
+            deepmd::InputNlist nl(inum_, ilist_, numneigh_, firstneigh_);
             DP_Nlist* new_nl = new DP_Nlist(nl); return new_nl;)
 }
 DP_Nlist* DP_NewNlist_comm(int inum_,
diff --git a/source/api_cc/src/DeepSpinPTExpt.cc b/source/api_cc/src/DeepSpinPTExpt.cc
index 4cd7319ff2..dac87369d9 100644
--- a/source/api_cc/src/DeepSpinPTExpt.cc
+++ b/source/api_cc/src/DeepSpinPTExpt.cc
@@ -389,7 +389,14 @@ void DeepSpinPTExpt::compute(ENERGYVTYPE& ener,
   bool multi_rank = (lmp_list.nprocs > 1);
   bool atom_map_present = (lmp_list.mapping != nullptr);
   bool use_with_comm = has_comm_artifact_ && multi_rank;
-  // See DeepPotPTExpt::compute_inner for the decision-matrix rationale.
+  // Decision matrix (see PR #5450 description):
+  //   non-GNN model (has_message_passing_ == false): regular path is
+  //                                                  always safe.
+  //   nghost == 0 (NoPbc, isolated cluster):         always safe.
+  //   GNN model, multi-rank:    requires has_comm_artifact_  (cell C-mr / D-mr)
+  //                             else fail-fast               (cell B-mr)
+  //   GNN model, single-rank:   requires atom_map_present    (cell A / C)
+  //                             else fail-fast               (cell B / D)
   if (has_message_passing_ && nghost > 0) {
     if (multi_rank && !has_comm_artifact_) {
       throw deepmd::deepmd_exception(
diff --git a/source/lib/include/neighbor_list.h b/source/lib/include/neighbor_list.h
index 41a5bbcc12..39682bcd9a 100644
--- a/source/lib/include/neighbor_list.h
+++ b/source/lib/include/neighbor_list.h
@@ -46,13 +46,14 @@ struct InputNlist {
   int mask = 0xFFFFFFFF;
   /// mapping from all atoms to real atoms, in the size of nall
   int* mapping = nullptr;
-  /// number of MPI ranks (1 = single-rank).  Passed by LAMMPS pair
-  /// styles as the trailing ``nprocs_`` argument of the comm-aware
-  /// constructor (sourced from ``comm->nprocs``); defaults to 1 for
-  /// direct C++ callers and for the lightweight constructors that
-  /// don't carry comm metadata.  Use this — NOT ``nswap > 0`` — as the
-  /// "is multi-rank?" predicate: ``atom_style spin`` and some other
-  /// LAMMPS configurations populate ``nswap`` even in single-rank.
+  /// number of MPI ranks (1 = single-rank).  Settable only via the
+  /// trailing ``nprocs_`` argument of the comm-aware constructor (LAMMPS
+  /// pair styles pass ``comm->nprocs``).  The lightweight constructors
+  /// leave it at 1 by construction — they carry no comm metadata
+  /// (``world``, ``sendlist``, ...), so they cannot drive the with-comm
+  /// dispatch path even if a non-1 value were forced here.  Use this —
+  /// NOT ``nswap > 0`` — as the "is multi-rank?" predicate: ``atom_style
+  /// spin`` populates ``nswap`` even in single-rank.
   int nprocs = 1;
   InputNlist()
       : inum(0),
@@ -67,11 +68,7 @@ struct InputNlist {
         sendproc(nullptr),
         recvproc(nullptr),
         world(0) {};
-  InputNlist(int inum_,
-             int* ilist_,
-             int* numneigh_,
-             int** firstneigh_,
-             int nprocs_ = 1)
+  InputNlist(int inum_, int* ilist_, int* numneigh_, int** firstneigh_)
       : inum(inum_),
         ilist(ilist_),
         numneigh(numneigh_),
@@ -83,8 +80,7 @@ struct InputNlist {
         sendlist(nullptr),
         sendproc(nullptr),
         recvproc(nullptr),
-        world(0),
-        nprocs(nprocs_) {};
+        world(0) {};
   InputNlist(int inum_,
              int* ilist_,
              int* numneigh_,
diff --git a/source/lmp/fix_dplr.cpp b/source/lmp/fix_dplr.cpp
index e31a71d519..e595e1624b 100644
--- a/source/lmp/fix_dplr.cpp
+++ b/source/lmp/fix_dplr.cpp
@@ -505,7 +505,7 @@ void FixDPLR::pre_force(int vflag) {
   // get lammps nlist
   NeighList* list = pair_deepmd->list;
   deepmd_compat::InputNlist lmp_list(list->inum, list->ilist, list->numneigh,
-                                     list->firstneigh, comm->nprocs);
+                                     list->firstneigh);
   lmp_list.set_mask(NEIGHMASK);
   if (comm->nprocs == 1 && atom->map_style != Atom::MAP_NONE) {
     lmp_list.set_mapping(mapping_vec.data());