Skip to content

Commit 8dc6f36

Browse files
Arm backend: Raise quantization testing thresholds (pytorch#17617)
Raises frobenius norm and cosine similarity thresholds for comparison ops and 8aw4 ops. These type of ops will always have potentially large diffs from the original model for random inputs when quantized and they have been causing flaky test failures. Signed-off-by: Adrian Lundell <adrian.lundell@arm.com>
1 parent f5e8123 commit 8dc6f36

10 files changed

Lines changed: 28 additions & 15 deletions

File tree

backends/arm/test/ops/test_conv2d.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ def test_convolution_2d_tosa_INT_a8w4(test_data):
510510
aten_op,
511511
exir_op,
512512
tosa_extensions=["int4"],
513-
frobenius_threshold=0.3,
513+
frobenius_threshold=0.4,
514514
)
515515
pipeline.quantizer.set_global(
516516
get_symmetric_a8w4_quantization_config(is_per_channel=per_channel_quantization)

backends/arm/test/ops/test_conv3d.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,7 @@ def test_convolution_3d_tosa_INT_a8w4(test_data):
531531
exir_op,
532532
tosa_extensions=["int4"],
533533
qtol=1,
534-
frobenius_threshold=0.2,
534+
frobenius_threshold=0.4,
535535
)
536536
pipeline.quantizer.set_global(
537537
get_symmetric_a8w4_quantization_config(is_per_channel=per_channel_quantization)

backends/arm/test/ops/test_depthwise_conv.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ def test_convolution_2d_tosa_INT_a8w4_depthwise(test_data):
279279
aten_op=[],
280280
exir_op=exir_op,
281281
tosa_extensions=["int4"],
282+
frobenius_threshold=0.4,
282283
)
283284
pipeline.quantizer.set_global(
284285
get_symmetric_a8w4_quantization_config(is_per_channel=per_channel_quantization)

backends/arm/test/ops/test_eq.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,8 @@ def test_eq_scalar_tosa_INT_tensor(test_module):
146146
test_module().get_inputs(),
147147
Equal.aten_op_Tensor,
148148
Equal.exir_op,
149-
frobenius_threshold=0.5, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
149+
frobenius_threshold=0.6, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
150+
cosine_threshold=0.8,
150151
)
151152
pipeline.run()
152153

@@ -158,7 +159,8 @@ def test_eq_scalar_tosa_INT(test_module):
158159
test_module().get_inputs(),
159160
Equal.aten_op_Tensor,
160161
Equal.exir_op,
161-
frobenius_threshold=0.5, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
162+
frobenius_threshold=0.6, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
163+
cosine_threshold=0.8,
162164
)
163165
pipeline.run()
164166

backends/arm/test/ops/test_ge.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,8 @@ def test_ge_tensor_tosa_INT(test_module):
148148
test_module().get_inputs(),
149149
GreaterEqual.aten_op_tensor,
150150
GreaterEqual.exir_op,
151-
frobenius_threshold=0.5, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
151+
frobenius_threshold=0.6, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
152+
cosine_threshold=0.8,
152153
)
153154
pipeline.run()
154155

@@ -160,7 +161,8 @@ def test_ge_scalar_tosa_INT(test_module):
160161
test_module().get_inputs(),
161162
GreaterEqual.aten_op_tensor,
162163
GreaterEqual.exir_op,
163-
frobenius_threshold=0.5, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
164+
frobenius_threshold=0.6, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
165+
cosine_threshold=0.8,
164166
)
165167
pipeline.run()
166168

backends/arm/test/ops/test_gt.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,8 @@ def test_gt_tensor_tosa_INT(test_module):
149149
test_module().get_inputs(),
150150
Greater.aten_op_tensor,
151151
Greater.exir_op,
152-
frobenius_threshold=0.5, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
152+
frobenius_threshold=0.6, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
153+
cosine_threshold=0.8,
153154
)
154155
pipeline.run()
155156

@@ -161,7 +162,8 @@ def test_gt_scalar_tosa_INT(test_module):
161162
test_module().get_inputs(),
162163
Greater.aten_op_tensor,
163164
Greater.exir_op,
164-
frobenius_threshold=0.5, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
165+
frobenius_threshold=0.6, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
166+
cosine_threshold=0.8,
165167
)
166168
pipeline.run()
167169

backends/arm/test/ops/test_le.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ def test_le_tensor_tosa_INT(test_module):
125125
test_module().get_inputs(),
126126
LessEqual.aten_op_tensor,
127127
LessEqual.exir_op,
128-
frobenius_threshold=0.5, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
128+
frobenius_threshold=0.6, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
129+
cosine_threshold=0.8,
129130
)
130131
pipeline.run()
131132

@@ -137,7 +138,8 @@ def test_le_scalar_tosa_INT(test_module):
137138
test_module().get_inputs(),
138139
LessEqual.aten_op_tensor,
139140
LessEqual.exir_op,
140-
frobenius_threshold=0.5, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
141+
frobenius_threshold=0.6, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
142+
cosine_threshold=0.8,
141143
)
142144
pipeline.run()
143145

backends/arm/test/ops/test_linear.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def test_linear_tosa_INT_a8w4(test_data: torch.Tensor):
217217
(test_data,),
218218
aten_op,
219219
tosa_extensions=["int4"],
220-
frobenius_threshold=0.15,
220+
frobenius_threshold=0.4,
221221
)
222222
pipeline.quantizer.set_global(
223223
get_symmetric_a8w4_quantization_config(is_per_channel=per_channel_quantization)

backends/arm/test/ops/test_lt.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ def test_lt_tensor_tosa_INT(test_module):
125125
test_module().get_inputs(),
126126
LessThan.aten_op_tensor,
127127
LessThan.exir_op,
128-
frobenius_threshold=0.5, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
128+
frobenius_threshold=0.6, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
129+
cosine_threshold=0.8,
129130
)
130131
pipeline.run()
131132

@@ -137,7 +138,8 @@ def test_lt_scalar_tosa_INT(test_module):
137138
test_module().get_inputs(),
138139
LessThan.aten_op_tensor,
139140
LessThan.exir_op,
140-
frobenius_threshold=0.5, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
141+
frobenius_threshold=0.6, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
142+
cosine_threshold=0.8,
141143
)
142144
pipeline.run()
143145

backends/arm/test/ops/test_ne.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@ def test_ne_tensor_tosa_INT(test_module):
111111
test_module.get_inputs(),
112112
NotEqual.decomposed_ops,
113113
NotEqual.exir_op,
114-
frobenius_threshold=0.5, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
114+
frobenius_threshold=0.6, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
115+
cosine_threshold=0.8,
115116
)
116117
pipeline.run()
117118

@@ -123,7 +124,8 @@ def test_ne_scalar_tosa_INT(test_module):
123124
test_module.get_inputs(),
124125
NotEqual.decomposed_ops,
125126
NotEqual.exir_op,
126-
frobenius_threshold=0.5, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
127+
frobenius_threshold=0.6, # Quantized comparisons with small diffs can be inaccurate, leading to large errors in unlucky cases.
128+
cosine_threshold=0.8,
127129
)
128130
pipeline.run()
129131

0 commit comments

Comments
 (0)