Source: onnxruntime
Version: 1.21.0+dfsg-1
Severity: normal
Tags: FTBFS patch
User: debian-loonga...@lists.debian.org
Usertags: loong64

Dear maintainers,

Compiling the onnxruntime failed for loong64 in the Debian Package Auto-Building environment.
The error log is as follows,
```
/<<PKGBUILDDIR>>/onnxruntime/core/mlas/lib/quantize.cpp:1707:88: error: dereferencing type-punned pointer will break strict-aliasing rules [-Werror=strict-aliasing]  1707 |     const __m128 MinimumValueVector = MlasReinterpretAsFloat32x4(__lsx_vreplgr2vr_w( *((uint32_t*)&min_f)));
| ~^~~~~~~~~~~~~~~~~~
/<<PKGBUILDDIR>>/onnxruntime/core/mlas/lib/quantize.cpp:1708:88: error: dereferencing type-punned pointer will break strict-aliasing rules [-Werror=strict-aliasing]  1708 |     const __m128 MaximumValueVector = MlasReinterpretAsFloat32x4(__lsx_vreplgr2vr_w( *((uint32_t*)&max_f)));
| ~^~~~~~~~~~~~~~~~~~
```
The full build log can be found at https://buildd.debian.org/status/fetch.php?pkg=onnxruntime&arch=loong64&ver=1.21.0%2Bdfsg-1&stamp=1742453939&raw=0.

We need to cherry-pick upstream patch to fix error caused by "-Werror=strict-aliasing", details can be found at https://github.com/microsoft/onnxruntime/pull/24578.
Due to upstream release version is v1.22 which does not include this patch.
Please consider the patch I attached.

Based on the attached patch, onnxruntime (1.21.0+dfsg-1+loong64) was built successfully on locally.
```
   dh_builddeb -O--buildsystem=cmake\+ninja -O--sourcedirectory=cmake -O--builddirectory=. dpkg-deb: building package 'libonnxruntime1.21-dbgsym' in '../libonnxruntime1.21-dbgsym_1.21.0+dfsg-1+loong64_loong64.deb'. dpkg-deb: building package 'libonnxruntime1.21' in '../libonnxruntime1.21_1.21.0+dfsg-1+loong64_loong64.deb'. dpkg-deb: building package 'libonnxruntime-dev' in '../libonnxruntime-dev_1.21.0+dfsg-1+loong64_loong64.deb'. dpkg-deb: building package 'onnxruntime-tools' in '../onnxruntime-tools_1.21.0+dfsg-1+loong64_all.deb'. dpkg-deb: building package 'python3-onnxruntime' in '../python3-onnxruntime_1.21.0+dfsg-1+loong64_loong64.deb'. dpkg-deb: building package 'python3-onnxruntime-dbgsym' in '../python3-onnxruntime-dbgsym_1.21.0+dfsg-1+loong64_loong64.deb'.
 dpkg-genbuildinfo -O../onnxruntime_1.21.0+dfsg-1+loong64_loong64.buildinfo
 dpkg-genchanges -O../onnxruntime_1.21.0+dfsg-1+loong64_loong64.changes
```

Best Regards,
Dandan Zhang

Description:  Fix error caused by "-Werror=strict-aliasing".
 .
 onnxruntime (1.21.0+dfsg-1+loong64) unstable; urgency=medium
 .
   * Cherry-pick upstream patch.
     - Fix warning and fix transpose store op for LoongArch.
Author: Dandan Zhang <zhangdan...@loongson.cn>

---
Applied-Upstream: master, https://github.com/microsoft/onnxruntime/pull/24578. 
Last-Update: 2025-05-15

--- onnxruntime-1.21.0+dfsg.orig/onnxruntime/core/mlas/lib/quantize.cpp
+++ onnxruntime-1.21.0+dfsg/onnxruntime/core/mlas/lib/quantize.cpp
@@ -1704,8 +1704,8 @@ MlasRequantizeOutput(
     float min_f = float(std::numeric_limits<OutputType>::lowest() - ZeroPoint);
     float max_f = float(std::numeric_limits<OutputType>::max() - ZeroPoint);
     const __m128 PerMatrixScaleVector = PerColumnScale ? MlasReinterpretAsFloat32x4(__lsx_vldi(0)) : MlasReinterpretAsFloat32x4(__lsx_vldrepl_w(Scale, 0));
-    const __m128 MinimumValueVector = MlasReinterpretAsFloat32x4(__lsx_vreplgr2vr_w( *((uint32_t*)&min_f)));
-    const __m128 MaximumValueVector = MlasReinterpretAsFloat32x4(__lsx_vreplgr2vr_w( *((uint32_t*)&max_f)));
+    const __m128 MinimumValueVector = MlasReinterpretAsFloat32x4((__m128i)(v4f32){min_f,min_f,min_f,min_f});
+    const __m128 MaximumValueVector = MlasReinterpretAsFloat32x4((__m128i)(v4f32){max_f,max_f,max_f,max_f});
     const __m128i ZeroPointVector = __lsx_vreplgr2vr_w(ZeroPoint);
 
     if (nullptr != Bias) {
--- onnxruntime-1.21.0+dfsg.orig/onnxruntime/core/mlas/lib/transpose.cpp
+++ onnxruntime-1.21.0+dfsg/onnxruntime/core/mlas/lib/transpose.cpp
@@ -470,20 +470,20 @@ MlasTranspose8x8Block(
     __m128i c3 = __lsx_vilvh_h(b3, b2);
 
     __m128 d0 = (__m128)(__lsx_vilvl_w(c2, c0));
-    __lsx_vst(__lsx_vinsgr2vr_d(__lsx_vld((__m128i *)&Output[OutputStride * 0], 0), __lsx_vpickve2gr_d(d0, 0), 0), (__m128i *)&Output[OutputStride * 0], 0);
-    __lsx_vst(__lsx_vinsgr2vr_d(__lsx_vld((__m128i *)&Output[OutputStride * 1], 0), __lsx_vpickve2gr_d(d0, 1), 0), (__m128i *)&Output[OutputStride * 1], 0);
+    __lsx_vstelm_d(d0, &Output[OutputStride * 0], 0, 0);
+    __lsx_vstelm_d(d0, &Output[OutputStride * 1], 0, 1);
 
     __m128 d1 = (__m128)(__lsx_vilvh_w(c2, c0));
-    __lsx_vst(__lsx_vinsgr2vr_d(__lsx_vld((__m128i *)&Output[OutputStride * 2], 0), __lsx_vpickve2gr_d(d1, 0), 0), (__m128i *)&Output[OutputStride * 2], 0);
-    __lsx_vst(__lsx_vinsgr2vr_d(__lsx_vld((__m128i *)&Output[OutputStride * 3], 0), __lsx_vpickve2gr_d(d1, 1), 0), (__m128i *)&Output[OutputStride * 3], 0);
+    __lsx_vstelm_d(d1, &Output[OutputStride * 2], 0, 0);
+    __lsx_vstelm_d(d1, &Output[OutputStride * 3], 0, 1);
 
     __m128 d2 = (__m128)(__lsx_vilvl_w(c3, c1));
-    __lsx_vst(__lsx_vinsgr2vr_d(__lsx_vld((__m128i *)&Output[OutputStride * 4], 0), __lsx_vpickve2gr_d(d2, 0), 0), (__m128i *)&Output[OutputStride * 4], 0);
-    __lsx_vst(__lsx_vinsgr2vr_d(__lsx_vld((__m128i *)&Output[OutputStride * 5], 0), __lsx_vpickve2gr_d(d2, 1), 0), (__m128i *)&Output[OutputStride * 5], 0);
+    __lsx_vstelm_d(d2, &Output[OutputStride * 4], 0, 0);
+    __lsx_vstelm_d(d2, &Output[OutputStride * 5], 0, 1);
 
     __m128 d3 = (__m128)(__lsx_vilvh_w(c3, c1));
-    __lsx_vst(__lsx_vinsgr2vr_d(__lsx_vld((__m128i *)&Output[OutputStride * 6], 0), __lsx_vpickve2gr_d(d3, 0), 0), (__m128i *)&Output[OutputStride * 6], 0);
-    __lsx_vst(__lsx_vinsgr2vr_d(__lsx_vld((__m128i *)&Output[OutputStride * 7], 0), __lsx_vpickve2gr_d(d3, 1), 0), (__m128i *)&Output[OutputStride * 7], 0);
+    __lsx_vstelm_d(d3, &Output[OutputStride * 6], 0, 0);
+    __lsx_vstelm_d(d3, &Output[OutputStride * 7], 0, 1);
 }
 
 #endif

Reply via email to