Skip to content

Commit fc0f06b

Browse files
committed
NXP Backend: Enable prefetching weights from external memory to SRAM in AoT and runtime
1 parent 8dc6f36 commit fc0f06b

6 files changed

Lines changed: 109 additions & 11 deletions

File tree

backends/nxp/backend/neutron_converter_manager.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,19 @@ def verify_target(self, target: str):
7272
f"Target `{target}` is not a valid target. Must be one of `{valid_targets}`."
7373
)
7474

75-
def convert(self, tflite_model: bytes, target: str) -> bytes:
75+
def convert(
76+
self, tflite_model: bytes, target: str, fetch_constants_to_sram: bool = False
77+
) -> bytes:
78+
"""
79+
Call Neutron Converter.
80+
81+
:param tflite_model: A generic TFLite model to be converted.
82+
:param target: The target platform.
83+
:param fetch_constants_to_sram: Add microcode that fetches weights from external memory.
84+
This allows running models which do not fit into SRAM. Applies to Neutron-C only (microcontrollers).
85+
86+
:return: TFLite model with Neutron microcode as bytes.
87+
"""
7688
# Neutron converter crashes if we provide invalid target -> verify.
7789
self.verify_target(target)
7890

@@ -82,6 +94,7 @@ def convert(self, tflite_model: bytes, target: str) -> bytes:
8294
cctx.compilationOpts.excludeGraphPasses = (
8395
"HoistSliceAboveTranspose,MergeTranspose"
8496
)
97+
cctx.compilationOpts.fetchConstantsToSRAM = fetch_constants_to_sram
8598

8699
# Try to use multiprocessing for isolation, but fall back to direct execution
87100
# if the environment doesn't support it (e.g., in sandcastle/build environments)

backends/nxp/nxp_backend.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def __init__(self):
4646
self.operators_not_to_delegate: List[str] = []
4747
self.neutron_converter_flavor = None
4848
self.use_neutron_for_format_conversion = True
49+
self.fetch_constants_to_sram = False
4950

5051
def _replace_colons(self, operator: str) -> str:
5152
"""
@@ -60,6 +61,7 @@ def neutron_compile_spec(
6061
extra_flags: Optional[str] = None,
6162
operators_not_to_delegate: Optional[List[str]] = None,
6263
use_neutron_for_format_conversion: bool = True,
64+
fetch_constants_to_sram: bool = False,
6365
):
6466
"""
6567
Generate compile spec for Neutron NPU
@@ -73,6 +75,8 @@ def neutron_compile_spec(
7375
use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to
7476
ensure that the IO matches the executorch partition, which will be
7577
delegated to Neutron.
78+
fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights
79+
from FLASH to SRAM. This should be used when the whole model does not fit into SRAM.
7680
"""
7781

7882
self.neutron_converter_flavor = neutron_converter_flavor
@@ -94,6 +98,8 @@ def neutron_compile_spec(
9498

9599
self.use_neutron_for_format_conversion = use_neutron_for_format_conversion
96100

101+
self.fetch_constants_to_sram = fetch_constants_to_sram
102+
97103
return self
98104

99105
def build(self):
@@ -116,6 +122,10 @@ def build(self):
116122
"use_neutron_for_format_conversion",
117123
f"{self.use_neutron_for_format_conversion}".encode(),
118124
),
125+
CompileSpec(
126+
"fetch_constants_to_sram",
127+
f"{self.fetch_constants_to_sram}".encode(),
128+
),
119129
]
120130

121131
return self.compile_spec
@@ -128,6 +138,7 @@ def generate_neutron_compile_spec(
128138
extra_flags: Optional[str] = None,
129139
operators_not_to_delegate: Optional[List[str]] = None,
130140
use_neutron_for_format_conversion: bool = True,
141+
fetch_constants_to_sram: bool = False,
131142
) -> List[CompileSpec]:
132143
return (
133144
NeutronCompileSpecBuilder()
@@ -137,6 +148,7 @@ def generate_neutron_compile_spec(
137148
extra_flags=extra_flags,
138149
operators_not_to_delegate=operators_not_to_delegate,
139150
use_neutron_for_format_conversion=use_neutron_for_format_conversion,
151+
fetch_constants_to_sram=fetch_constants_to_sram,
140152
)
141153
.build()
142154
)
@@ -160,6 +172,7 @@ def preprocess( # noqa C901
160172
target = ""
161173
neutron_converter_flavor = ""
162174
use_neutron_for_format_conversion = None
175+
fetch_constants_to_sram = False
163176
for spec in compile_spec:
164177
if spec.key == "output_format":
165178
output_format = spec.value.decode()
@@ -171,6 +184,8 @@ def preprocess( # noqa C901
171184
neutron_converter_flavor = spec.value.decode()
172185
if spec.key == "use_neutron_for_format_conversion":
173186
use_neutron_for_format_conversion = spec.value.decode() == "True"
187+
if spec.key == "fetch_constants_to_sram":
188+
fetch_constants_to_sram = spec.value.decode() == "True"
174189

175190
# Check that the output format is set in the compile spec
176191
if not output_format:
@@ -212,7 +227,7 @@ def preprocess( # noqa C901
212227
)
213228

214229
neutron_model = NeutronConverterManager(neutron_converter_flavor).convert(
215-
tflite_model, target
230+
tflite_model, target, fetch_constants_to_sram
216231
)
217232

218233
# Dump the tflite file if logging level is enabled

backends/nxp/runtime/NeutronBackend.cpp

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ typedef struct {
7070
int numOutputs = 0;
7171
int numInputArgs = 0;
7272
uint32_t scratchSize = 0;
73+
#ifdef EXTERNAL_MEM
74+
uint32_t sramScratchSize = 0;
75+
#endif
7376
uint32_t profileSize = 0;
7477
uint32_t debugSize = 0;
7578
NeutronModelConfig mcfg;
@@ -79,7 +82,18 @@ typedef struct {
7982
const uint8_t* outputTranspositionFlags;
8083
const uint8_t* inputMap;
8184
const uint8_t* outputMap;
82-
} NeutronConfig;
85+
} NeutronExecutorchConfig;
86+
87+
#ifdef EXTERNAL_MEM
88+
// Neutron compute has no access to FLASH.
89+
// Prefetch weights from FLASH to SRAM using memcpy.
90+
// For a model converted with --fetch_constants_to_sram.
91+
void copy(void* dst, void* src, uint32_t size, uint32_t channel) {
92+
memcpy(dst, src, size);
93+
}
94+
void wait(uint32_t channel) {}
95+
static NeutronConfig neutronMemCopyConfig = {copy, wait};
96+
#endif
8397

8498
// Applied on outputs.
8599
template <typename T>
@@ -258,7 +272,7 @@ class NeutronBackend final : public PyTorchBackendInterface {
258272
ArrayRef<CompileSpec> compile_specs) const override {
259273
MemoryAllocator* allocator = context.get_runtime_allocator();
260274

261-
auto* cfg = allocator->allocateInstance<NeutronConfig>();
275+
auto* cfg = allocator->allocateInstance<NeutronExecutorchConfig>();
262276

263277
// The following data is read from the "processed" data blob.
264278
// cfg->numInputs
@@ -293,15 +307,22 @@ class NeutronBackend final : public PyTorchBackendInterface {
293307
switch (payloadVersion) {
294308
case 0:
295309
cfg->scratchSize = buffer[9];
310+
#ifdef EXTERNAL_MEM
311+
cfg->sramScratchSize = buffer[10];
312+
#endif
296313
cfg->profileSize = 0;
297314
cfg->debugSize = 0;
298315
cfg->numInputs = buffer[11];
299316
cfg->numOutputs = buffer[12];
300317
break;
301318
case 1:
302319
cfg->scratchSize = buffer[9];
303-
cfg->profileSize = buffer[10];
320+
// The highest bit has special meaning in NS >= 2.2.3
321+
cfg->profileSize = buffer[10] & 0x7FFFFFFF;
304322
cfg->debugSize = buffer[11];
323+
#ifdef EXTERNAL_MEM
324+
cfg->sramScratchSize = buffer[12];
325+
#endif
305326
cfg->numInputs = buffer[13];
306327
cfg->numOutputs = buffer[14];
307328
break;
@@ -351,6 +372,14 @@ class NeutronBackend final : public PyTorchBackendInterface {
351372
return Error::InvalidProgram;
352373
}
353374

375+
#ifdef EXTERNAL_MEM
376+
neutronRC = neutronSetConfig(&neutronMemCopyConfig);
377+
if (neutronRC != ENONE) {
378+
ET_LOG(Error, "Neutron set config failed with error code %ld", neutronRC);
379+
return Error::InvalidProgram;
380+
}
381+
#endif
382+
354383
return cfg;
355384
}
356385

@@ -365,7 +394,8 @@ class NeutronBackend final : public PyTorchBackendInterface {
365394
BackendExecutionContext& context,
366395
DelegateHandle* input_handle,
367396
Span<EValue*> args) const override {
368-
NeutronConfig* cfg = static_cast<NeutronConfig*>(input_handle);
397+
NeutronExecutorchConfig* cfg =
398+
static_cast<NeutronExecutorchConfig*>(input_handle);
369399

370400
// Allocate place for input and output pointers.
371401
cfg->dcfg.inputs = static_cast<const void**>(
@@ -381,6 +411,12 @@ class NeutronBackend final : public PyTorchBackendInterface {
381411
cfg->dcfg.outputs[cfg->numOutputs + 2] =
382412
static_cast<void*>(context.allocate(cfg->debugSize, 16));
383413

414+
#ifdef EXTERNAL_MEM
415+
// Allocate the space in SRAM to prefetch weights from FLASH.
416+
cfg->dcfg.scratchWeights =
417+
static_cast<void*>(context.allocate(cfg->sramScratchSize, 16));
418+
#endif
419+
384420
// Set inputs from args.
385421
// Transpose inputs if needed.
386422
for (int i = 0; i < cfg->numInputs; i++) {
@@ -527,7 +563,8 @@ class NeutronBackend final : public PyTorchBackendInterface {
527563
}
528564

529565
void destroy(DelegateHandle* handle) const override {
530-
NeutronConfig* cfg = reinterpret_cast<NeutronConfig*>(handle);
566+
NeutronExecutorchConfig* cfg =
567+
reinterpret_cast<NeutronExecutorchConfig*>(handle);
531568

532569
// Unprepare to free resources in neutron driver.
533570
NeutronError neutronRC = neutronModelUnprepare(cfg->nmh);

backends/nxp/tests/executorch_pipeline.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ def to_quantized_edge_program(
9999
get_quantizer_fn=None,
100100
use_neutron_for_format_conversion=True,
101101
use_quant_state_dict=True,
102+
fetch_constants_to_sram=False,
102103
) -> EdgeProgramManager:
103104
_neutron_target_spec = NeutronTargetSpec(target, neutron_converter_flavor)
104105
if get_quantizer_fn is None:
@@ -128,6 +129,7 @@ def to_quantized_edge_program(
128129
operators_not_to_delegate=operators_not_to_delegate,
129130
neutron_converter_flavor=neutron_converter_flavor,
130131
use_neutron_for_format_conversion=use_neutron_for_format_conversion,
132+
fetch_constants_to_sram=fetch_constants_to_sram,
131133
)
132134
post_quant_state_dict = (
133135
exir_program_aten__module_quant.state_dict() if use_quant_state_dict else None

backends/nxp/tests/test_neutron_converter_manager.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2024-2025 NXP
1+
# Copyright 2024-2026 NXP
22
#
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.
@@ -14,7 +14,8 @@
1414
NeutronConverterManager,
1515
)
1616
from executorch.backends.nxp.backend.node_format_inference import NodeFormatInference
17-
from executorch.backends.nxp.tests.models import Conv2dModule
17+
from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
18+
from executorch.backends.nxp.tests.models import Conv2dModule, LinearModule
1819

1920

2021
def test_conv2d_neutron_conversion__default_flavor():
@@ -31,7 +32,7 @@ def test_conv2d_neutron_conversion__default_flavor():
3132
)
3233

3334
neutron_converter_manager = NeutronConverterManager()
34-
neutron_model = neutron_converter_manager.convert(tflite_model, "imxrt700")
35+
neutron_model = neutron_converter_manager.convert(tflite_model, "imxrt700", False)
3536

3637
assert len(
3738
neutron_model
@@ -52,8 +53,30 @@ def test__conv2d_neutron_conversion__invalid_flavor():
5253
)
5354

5455
with pytest.raises(RuntimeError) as excinfo:
55-
_ = NeutronConverterManager("bad_flavor").convert(tflite_model, "imxrt700")
56+
_ = NeutronConverterManager("bad_flavor").convert(
57+
tflite_model, "imxrt700", False
58+
)
5659

5760
assert "Neutron Converter module with flavor 'bad_flavor' not found." in str(
5861
excinfo
5962
)
63+
64+
65+
def test_conv2d_neutron_conversion__prefetching(mocker):
66+
model = LinearModule(True)
67+
input_shape = (1, 1, 32, 32)
68+
69+
converter_spy = mocker.spy(NeutronConverterManager, "convert")
70+
_ = to_quantized_edge_program(
71+
model, input_shape, fetch_constants_to_sram=True
72+
).exported_program()
73+
neutron_model_prefetch = converter_spy.spy_return
74+
75+
_ = to_quantized_edge_program(
76+
model, input_shape, fetch_constants_to_sram=False
77+
).exported_program()
78+
neutron_model_regular = converter_spy.spy_return
79+
80+
assert len(neutron_model_prefetch) != len(
81+
neutron_model_regular
82+
), "The weight prefetching flag does not make a difference!"

examples/nxp/aot_neutron_compile.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,13 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
229229
action="store_true",
230230
help="The calibration and testing datasets will be generated randomly instead of being downloaded.",
231231
)
232+
parser.add_argument(
233+
"--fetch_constants_to_sram",
234+
required=False,
235+
default=False,
236+
action="store_true",
237+
help="This feature allows running models which do not fit into SRAM by offloading them to an external memory.",
238+
)
232239

233240
args = parser.parse_args()
234241

@@ -313,6 +320,7 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
313320
args.target,
314321
operators_not_to_delegate=args.operators_not_to_delegate,
315322
neutron_converter_flavor=args.neutron_converter_flavor,
323+
fetch_constants_to_sram=args.fetch_constants_to_sram,
316324
)
317325
partitioners = (
318326
[

0 commit comments

Comments
 (0)