NXP backend: added Squeeze support (pytorch#16540)

novak-vaclav · web-flow · commit ef6ceaec7a3e · 2026-02-23T13:24:45.000+01:00
### Summary adds support for "Squeeze" operator ### Test plan tests can be manually run using `pytest -c /dev/null backends/nxp/tests/` cc @robert-kalmar @MartinPavella
diff --git a/backends/nxp/aten_passes/convert_unsqueeze_to_view.py b/backends/nxp/aten_passes/convert_unsqueeze_to_view.py
diff --git a/backends/nxp/aten_passes/decompose_split_to_slices_pass.py b/backends/nxp/aten_passes/decompose_split_to_slices_pass.py
@@ -187,7 +187,7 @@ def call(self, graph_module: GraphModule) -> Optional[PassResult]:
             self._replace_split_with_slices(input_node, split_node, starts, ends, dim)
             made_changes = True
 
-        self.graph_module.recompile()
         self.graph_module.graph.eliminate_dead_code()
+        self.graph_module.recompile()
 
         return PassResult(self.graph_module, made_changes)
diff --git a/backends/nxp/aten_passes/neutron_aten_pass_manager.py b/backends/nxp/aten_passes/neutron_aten_pass_manager.py
@@ -7,9 +7,6 @@
 
 import torch
 
-from executorch.backends.nxp.aten_passes.convert_unsqueeze_to_view import (
-    ConvertUnsqueezeToViewPass,
-)
 from executorch.backends.nxp.aten_passes.decompose_split_to_slices_pass import (
     DecomposeSplitToSlicesPass,
 )
@@ -50,7 +47,6 @@ def _get_default_passes(neutron_target_spec, qat_mode: bool = False) -> list[Pas
         RemoveNodesWithKnownOutputs(),
         FuseLinearAndAddPass(),
         MoveActivationBeforeConcat(neutron_target_spec),
-        ConvertUnsqueezeToViewPass(),
     ]
 
     if not qat_mode:
diff --git a/backends/nxp/edge_passes/convert_reshaping_nodes_to_view.py b/backends/nxp/edge_passes/convert_reshaping_nodes_to_view.py
@@ -0,0 +1,102 @@
+# Copyright 2026 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+
+from executorch.backends.nxp.edge_passes.neutron_edge_pass import NeutronEdgePass
+from executorch.exir.dialects._ops import ops as exir_ops
+from torch._subclasses import FakeTensor, FakeTensorMode
+from torch.fx import GraphModule, Node
+from torch.fx.passes.infra.pass_base import PassResult
+
+
+class ConvertReshapingNodesToViewPass(NeutronEdgePass):
+    """Replaces:
+        - 'aten.squeeze.default', 'aten.squeeze.dims' and 'aten.squeeze.dim' with 'aten.view_copy.default'.
+
+                   x                                                  x
+                   │                                                  │
+    ┌──────────────▼──────────────┐    replace with   ┌───────────────▼────────────────┐
+    │   aten.[un]squeeze(x, dim)  │  ──────────────►  │  aten.view_copy.default(x, S)  │
+    └──────────────┬──────────────┘                   └───────────────┬────────────────┘
+                   │                                                  │
+                   ▼                                                  ▼
+                  out                                                out
+
+        - 'aten.unsqueeze.default' with 'aten.view_copy.default'.
+
+                  x                                                 x
+                  │                                                 │
+    ┌─────────────▼─────────────┐    replace with   ┌───────────────▼────────────────┐
+    │   aten.unsqueeze(x, dim)  │  ──────────────►  │  aten.view_copy.default(x, S)  │
+    └─────────────┬─────────────┘                   └───────────────┬────────────────┘
+                  │                                                 │
+                  ▼                                                 ▼
+                 out                                               out
+    """
+
+    graph_module: GraphModule
+
+    @staticmethod
+    def _is_squeeze(node_: Node) -> bool:
+        return node_.op == "call_function" and (
+            node_.target == exir_ops.edge.aten.squeeze_copy.dim
+            or node_.target == exir_ops.edge.aten.squeeze_copy.dims
+            or node_.target == exir_ops.edge.aten.squeeze_copy.default
+        )
+
+    @staticmethod
+    def _is_unsqueeze(node_: Node) -> bool:
+        return (
+            node_.op == "call_function"
+            and node_.target == exir_ops.edge.aten.unsqueeze_copy.default
+        )
+
+    def _create_view_copy_node(self, *view_args) -> Node:
+        view_target = exir_ops.edge.aten.view_copy.default
+        view_node = self.graph_module.graph.call_function(view_target, view_args)
+
+        view_node.meta["source_fn_stack"] = [
+            (view_node.name, exir_ops.edge.aten.view_copy.default)
+        ]
+
+        x_val = view_args[0].meta["val"]
+        with FakeTensorMode() as mode:
+            fake_input = FakeTensor.from_tensor(
+                torch.empty(x_val.shape, dtype=x_val.dtype), mode
+            )
+            output_shape = view_target(fake_input, *view_args[1:]).shape
+            view_node.meta["val"] = FakeTensor.from_tensor(
+                torch.empty(output_shape, dtype=x_val.dtype), mode
+            )
+
+        return view_node
+
+    def run(self, graph_module: GraphModule) -> PassResult:
+        self.graph_module = graph_module
+
+        for node in list(graph_module.graph.nodes):
+            if not (self._is_squeeze(node) or self._is_unsqueeze(node)):
+                continue
+
+            input_node = node.all_input_nodes[0]
+            target_shape = node.meta["val"].shape
+
+            with self.graph_module.graph.inserting_after(node):
+                view_copy_node = self._create_view_copy_node(input_node, target_shape)
+
+            node.replace_all_uses_with(view_copy_node)
+            self.graph_module.graph.erase_node(node)
+
+            self.graph_module.graph.eliminate_dead_code()
+            self.graph_module.recompile()
+
+            # Return immediately to avoid traversing a modified graph.
+            # The parent class will call this pass again.
+            return PassResult(graph_module, True)
+
+        # The graph was not modified.
+        return PassResult(graph_module, False)
diff --git a/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py b/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py
@@ -1,4 +1,4 @@
-# Copyright 2025 NXP
+# Copyright 2025-2026 NXP
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -105,6 +105,9 @@ class MoveLeadingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass):
             ViewCopy,
         ],
         ViewCopy: [Clone, CloneDimOrder],
+        Conv: [
+            ViewCopy,  # For 1D conv.
+        ],
     }
 
     def run(self, graph_module: torch.fx.GraphModule) -> PassResult:
@@ -200,6 +203,7 @@ class MoveTrailingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass):
             Relu,
             Sigmoid,
             Tanh,
+            ViewCopy,  # For 1D conv.
         ],
         ViewCopy: [Clone, CloneDimOrder],
     }
diff --git a/backends/nxp/edge_passes/neutron_edge_pass_manager.py b/backends/nxp/edge_passes/neutron_edge_pass_manager.py
@@ -3,6 +3,9 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+from executorch.backends.nxp.edge_passes.convert_reshaping_nodes_to_view import (
+    ConvertReshapingNodesToViewPass,
+)
 from executorch.backends.nxp.edge_passes.move_auxiliary_operator_into_separate_qdq_cluster_pass import (
     MoveLeadingAuxiliaryOperatorIntoSeparateQDQClusterPass,
     MoveTrailingAuxiliaryOperatorIntoSeparateQDQClusterPass,
@@ -21,6 +24,7 @@ def __init__(self, passes: list[NeutronEdgePass] = None):
             MoveLeadingAuxiliaryOperatorIntoSeparateQDQClusterPass(),
             MoveTrailingAuxiliaryOperatorIntoSeparateQDQClusterPass(),
             RemoveUselessAsStridedCopyNodes(),
+            ConvertReshapingNodesToViewPass(),
         ]
 
         super().__init__(
diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py
@@ -48,10 +48,14 @@
     SigmoidPattern,
     SliceTensorPattern,
     SoftMaxPattern,
+    SqueezeDimPattern,
+    SqueezeDimsPattern,
+    SqueezePattern,
     SubTensorPattern,
     TanhInPlacePattern,
     TanhPattern,
     TransposeIntPattern,
+    UnsqueezePattern,
     UpsampleBilinear2DPattern,
     UpsampleNearest2DPattern,
     ViewPattern,
@@ -281,10 +285,14 @@ def __init__(self, neutron_target_spec: NeutronTargetSpec, is_qat: bool = False)
                 OpQuantizer(SigmoidPattern(is_qat=is_qat), static_qconfig),
                 OpQuantizer(SliceTensorPattern(is_qat=is_qat), static_qconfig),
                 OpQuantizer(SoftMaxPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(SqueezeDimPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(SqueezeDimsPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(SqueezePattern(is_qat=is_qat), static_qconfig),
                 OpQuantizer(SubTensorPattern(is_qat=is_qat), static_qconfig),
                 OpQuantizer(TanhPattern(is_qat=is_qat), static_qconfig),
                 OpQuantizer(TanhInPlacePattern(is_qat=is_qat), static_qconfig),
                 OpQuantizer(TransposeIntPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(UnsqueezePattern(is_qat=is_qat), static_qconfig),
                 OpQuantizer(UpsampleBilinear2DPattern(is_qat=is_qat), static_qconfig),
                 OpQuantizer(UpsampleNearest2DPattern(is_qat=is_qat), static_qconfig),
                 OpQuantizer(ViewPattern(is_qat=is_qat), static_qconfig),
diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py
@@ -972,6 +972,33 @@ def get_anchors(
         )
 
 
+class SqueezePattern(SharedSpecPattern):
+    """
+    Quantizer for the `aten.squeeze.default` operator.
+    """
+
+    def partition_types(self):
+        return [torch.ops.aten.squeeze.default]
+
+
+class SqueezeDimPattern(SharedSpecPattern):
+    """
+    Quantizer for the `aten.squeeze.dim` operator.
+    """
+
+    def partition_types(self):
+        return [torch.ops.aten.squeeze.dim]
+
+
+class SqueezeDimsPattern(SharedSpecPattern):
+    """
+    Quantizer for the `aten.squeeze.dims` operator.
+    """
+
+    def partition_types(self):
+        return [torch.ops.aten.squeeze.dims]
+
+
 class TanhPattern(QuantizationPattern):
     """
     Quantizer for Tanh operator.
@@ -1008,6 +1035,13 @@ def get_anchors(
         )
 
 
+class UnsqueezePattern(SharedSpecPattern):
+    """Quantizer for the `aten.unsqueeze.default` operator."""
+
+    def partition_types(self):
+        return [torch.ops.aten.unsqueeze.default]
+
+
 class UpsampleBilinear2DPattern(SharedSpecPattern):
     """
     Quantizer for `aten.upsample_bilinear2d.vec` operator.
diff --git a/backends/nxp/tests/models.py b/backends/nxp/tests/models.py
@@ -788,3 +788,15 @@ def forward(self, x, divisor):
         # partition 2
         x = self.prelu(x)
         return x
+
+
+class SqueezeAddModel(torch.nn.Module):
+    def __init__(self, dim=None):
+        super().__init__()
+        self.dim = dim
+
+    def forward(self, x, y):
+        if self.dim is None:
+            return torch.squeeze(x + y)
+        else:
+            return torch.squeeze(x + y, self.dim)
diff --git a/backends/nxp/tests/test_batch_norm_fusion.py b/backends/nxp/tests/test_batch_norm_fusion.py
@@ -1,4 +1,4 @@
-# Copyright 2025 NXP
+# Copyright 2025-2026 NXP
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -172,9 +172,7 @@ def test_batch_norm_conv_fusing__full_pipeline__1d(bias: bool):
     ).exported_program()
     nodes = list(edge_program.graph.nodes)
 
-    assert (
-        len(nodes) == 17
-    )  # 1D Conv currently isn't delegated, because it doesn't get quantized.
+    assert len(nodes) == 13
     assert not any(
         node.op == "call_function" and "batch_norm" in node.target.__name__
         for node in nodes
diff --git a/backends/nxp/tests/test_convert_reshaping_nodes_to_view.py b/backends/nxp/tests/test_convert_reshaping_nodes_to_view.py
diff --git a/backends/nxp/tests/test_convert_unsqueeze_to_view.py b/backends/nxp/tests/test_convert_unsqueeze_to_view.py
diff --git a/docs/source/backends/nxp/op-support.csv b/docs/source/backends/nxp/op-support.csv

Original file line number	Diff line number	Diff line change
`@@ -7,9 +7,6 @@`
`7`	`7`
`8`	`8`	`import torch`
`9`	`9`
`10`		`-from executorch.backends.nxp.aten_passes.convert_unsqueeze_to_view import (`
`11`		`- ConvertUnsqueezeToViewPass,`
`12`		`-)`
`13`	`10`	`from executorch.backends.nxp.aten_passes.decompose_split_to_slices_pass import (`
`14`	`11`	`DecomposeSplitToSlicesPass,`
`15`	`12`	`)`
`@@ -50,7 +47,6 @@ def _get_default_passes(neutron_target_spec, qat_mode: bool = False) -> list[Pas`
`50`	`47`	`RemoveNodesWithKnownOutputs(),`
`51`	`48`	`FuseLinearAndAddPass(),`
`52`	`49`	`MoveActivationBeforeConcat(neutron_target_spec),`
`53`		`- ConvertUnsqueezeToViewPass(),`
`54`	`50`	`]`
`55`	`51`
`56`	`52`	`if not qat_mode:`