https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108498
Bug ID: 108498 Summary: ppc64 big endian generates uninitialized reads with -fstore-merging Product: gcc Version: 12.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c Assignee: unassigned at gcc dot gnu.org Reporter: kungfujesus06 at gmail dot com Target Milestone: --- It seems that populating a bit field of many with store merging enabled produces an access to uninitialized memory. Here's the minimal reproducer: ``` #include <stdio.h> #include <stdint.h> #define NVFX_FP_MASK_X 1 #define NVFX_FP_MASK_Y 2 #define NVFX_FP_MASK_Z 4 #define NVFX_FP_MASK_W 8 #define NVFX_FP_MASK_ALL 0xf #define NV40_FP_OP_OUT_NONE (1U << 30) #define NVFX_FP_OP_OPCODE_MUL 0x02 #define NVFX_COND_TR 7 #define NVFXSR_NONE 0 #define arith(s,o,d,m,s0,s1,s2) \ nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, -1, \ (d), (m), (s0), (s1), (s2)) struct nvfx_reg { int8_t type; int32_t index; }; struct nvfx_src { struct nvfx_reg reg; uint8_t indirect : 1; uint8_t indirect_reg : 1; uint8_t indirect_swz : 2; uint8_t negate : 1; uint8_t abs : 1; uint8_t swz[4]; }; struct nvfx_insn { uint8_t op; char scale; int8_t unit; uint8_t mask; uint8_t cc_swz[4]; uint8_t sat : 1; uint8_t cc_update : 1; uint8_t cc_update_reg : 1; uint8_t cc_test : 3; uint8_t cc_test_reg : 1; struct nvfx_reg dst; struct nvfx_src src[3]; }; static inline struct nvfx_insn nvfx_insn(uint8_t sat, unsigned op, int unit, struct nvfx_reg dst, unsigned mask, struct nvfx_src s0, struct nvfx_src s1, struct nvfx_src s2) { struct nvfx_insn insn = { .op = op, .scale = 0, .unit = unit, .sat = sat, .mask = mask, .cc_update = 0, .cc_update_reg = 0, .cc_test = NVFX_COND_TR, .cc_test_reg = 0, .cc_swz = { 0, 1, 2, 3 }, .dst = dst, .src = {s0, s1, s2} }; return insn; } static inline struct nvfx_reg nvfx_reg(int type, int index) { struct nvfx_reg temp = { .type = type, .index = index, }; return temp; } static inline struct nvfx_src nvfx_src(struct nvfx_reg reg) { struct nvfx_src temp = { .reg = reg, .abs = 0, .negate = 0, .swz = { 0, 1, 2, 3 }, .indirect = 0, }; return temp; } struct nvfx_insn emit_test(void) { const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); struct nvfx_insn insn; struct nvfx_src src[2]; struct nvfx_reg tmp = {0, 1}; int mask, sat, unit = 0; int ai = -1, ci = -1, ii = -1; int i; src[0].reg.type = 0; src[0].reg.index = 2; src[1].reg.type = 4; src[1].reg.index = 8; return arith(0, MUL, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], src[1], none); } int main(void) { struct nvfx_insn ins = emit_test(); printf("sat? = %d\n", ins.sat); } ``` This should print 0, with -fstore-merging it often prints 1. Valgrind shows it's access unitialized memory. The assembly with that optimization disabled and enabled are in the bug report filed here: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8134