https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109605
Wojciech Mula <wojciech_mula at poczta dot onet.pl> changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |wojciech_mula at poczta dot onet.p | |l --- Comment #3 from Wojciech Mula <wojciech_mula at poczta dot onet.pl> --- This is somehow related. I needed to generate the particular procedure without any vector instruction (the surrounding code is free to RVV instructions). But when a code uses the builtin function `memset`, GCC still emits some vector instruction. The cure is setting `-fno-builtin`, because pragma does not accept that option. The attached sample code comes from simdutf project (src/scalar/utf.f), godbolt link for convenience https://godbolt.org/z/Ya91he99v. ---no-vector.cpp-- #include <cstdlib> #include <cstdint> #include <cstring> #pragma GCC optimize ("no-tree-vectorize") #pragma GCC optimize ("no-tree-loop-vectorize") #pragma GCC optimize ("no-tree-slp-vectorize") #pragma GCC optimize ("no-builtin") // not accepted by the compiler bool validate(const char *buf, size_t len) noexcept { const uint8_t *data = reinterpret_cast<const uint8_t *>(buf); uint64_t pos = 0; uint32_t code_point = 0; while (pos < len) { // check of the next 16 bytes are ascii. uint64_t next_pos = pos + 16; if (next_pos <= len) { // if it is safe to read 16 more bytes, check that they are ascii uint64_t v1; std::memcpy(&v1, data + pos, sizeof(uint64_t)); uint64_t v2; std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); uint64_t v{v1 | v2}; if ((v & 0x8080808080808080) == 0) { pos = next_pos; continue; } } unsigned char byte = data[pos]; while (byte < 0b10000000) { if (++pos == len) { return true; } byte = data[pos]; } if ((byte & 0b11100000) == 0b11000000) { next_pos = pos + 2; if (next_pos > len) { return false; } if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } // range check code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); if ((code_point < 0x80) || (0x7ff < code_point)) { return false; } } else if ((byte & 0b11110000) == 0b11100000) { next_pos = pos + 3; if (next_pos > len) { return false; } if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; } // range check code_point = (byte & 0b00001111) << 12 | (data[pos + 1] & 0b00111111) << 6 | (data[pos + 2] & 0b00111111); if ((code_point < 0x800) || (0xffff < code_point) || (0xd7ff < code_point && code_point < 0xe000)) { return false; } } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 next_pos = pos + 4; if (next_pos > len) { return false; } if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; } if ((data[pos + 3] & 0b11000000) != 0b10000000) { return false; } // range check code_point = (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); if (code_point <= 0xffff || 0x10ffff < code_point) { return false; } } else { // we may have a continuation return false; } pos = next_pos; } return true; } ---eof--- The head of generated asm: --- validate(char const*, unsigned long): beq a1,zero,.L32 li a4,2139062272 addi a4,a4,-129 slli a2,a4,32 addi sp,sp,-16 add a2,a2,a4 li a5,0 xori a2,a2,-1 addi a7,sp,8 vsetivli zero,8,e8,mf2,ta,ma ###### here .L2: addi a3,a5,16 add t1,a0,a5 bltu a1,a3,.L36 vle8.v v1,0(t1) ##### addi a4,a5,8 add a4,a0,a4 vse8.v v1,0(sp) ##### vle8.v v1,0(a4) ##### ld a4,0(sp) vse8.v v1,0(a7) ##### ld a6,8(sp) or a4,a4,a6 and a4,a4,a2 bne a4,zero,.L36 mv a5,a3 .L6: ---