Swapping the order of definition of the arrays as in this updated patch fixes the stack smashing with the stack protector enabled — which is ideal so if you still didn't upload the previous one, prefer this one —, maybe a bug in GCC[1].
Hopefully that will help aarch64 otherwise let's just try Clang. Run in [2]. I'll run it against aarch64 too when Launchpad finally decides to give it a builder. [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115965 [2] https://autopkgtest.ubuntu.com/results/autopkgtest-oracular-nteodosio-rebuilds/oracular/amd64/h/highway/20240717_113010_6b611@/log.gz
--- highway-1.2.0/debian/tests/compile-and-execute 1970-01-01 01:00:00.000000000 +0100 +++ highway-1.2.0/debian/tests/compile-and-execute 2024-07-16 13:00:51.000000000 +0200 @@ -0,0 +1,39 @@ +#!/bin/sh +set -ex +trap 'rm o' 0 +cflags=$(pkg-config --cflags --libs libhwy) +# Why do we get stack smashing on GCC but not on Clang with the stack protector? +c++ -Wall -Wextra -Wpedantic $cflags -x c++ - -o o <<EOF +#include <hwy/highway.h> +#include <stdio.h> +namespace hn = hwy::HWY_NAMESPACE; +using T = int; +void MulAddLoop(const T* HWY_RESTRICT mul_array, + const T* HWY_RESTRICT add_array, + const size_t size, T* HWY_RESTRICT x_array) { + const hn::ScalableTag<T> d; + for (size_t i = 0; i < size; i += hn::Lanes(d)) { + const auto mul = hn::Load(d, mul_array + i); + const auto add = hn::Load(d, add_array + i); + auto x = hn::Load(d, x_array + i); + x = hn::MulAdd(mul, x, add); + hn::Store(x, d, x_array + i); + } +} +int main(){ + T c[]={-1,-1,-1,-1,-1}; + const T a[]={1,2,3,4,5},b[]={6,7,9,0,0},expect[]={5,5,6,-4,-5}; + size_t size=sizeof(c)/sizeof(c[0]); + MulAddLoop(a,b,size,c); + for(size_t i=0;i<size;i++){ + if (c[i]!=expect[i]){ + for(size_t j=0;j<size;j++){ + printf("%zuth element is %d, expected %d.\n",j,c[j],expect[j]); + } + return 52; + } + } + return 0; +} +EOF +./o --- highway-1.2.0/debian/tests/control 1970-01-01 01:00:00.000000000 +0100 +++ highway-1.2.0/debian/tests/control 2024-07-16 13:00:51.000000000 +0200 @@ -0,0 +1,3 @@ +Tests: compile-and-execute +Depends: libhwy-dev, g++, pkgconf +Restrictions: allow-stderr