https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108498
Bug ID: 108498
Summary: ppc64 big endian generates uninitialized reads with
-fstore-merging
Product: gcc
Version: 12.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: c
Assignee: unassigned at gcc dot gnu.org
Reporter: kungfujesus06 at gmail dot com
Target Milestone: ---
It seems that populating a bit field of many with store merging enabled
produces an access to uninitialized memory. Here's the minimal reproducer:
```
#include <stdio.h>
#include <stdint.h>
#define NVFX_FP_MASK_X 1
#define NVFX_FP_MASK_Y 2
#define NVFX_FP_MASK_Z 4
#define NVFX_FP_MASK_W 8
#define NVFX_FP_MASK_ALL 0xf
#define NV40_FP_OP_OUT_NONE (1U << 30)
#define NVFX_FP_OP_OPCODE_MUL 0x02
#define NVFX_COND_TR 7
#define NVFXSR_NONE 0
#define arith(s,o,d,m,s0,s1,s2) \
nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, -1, \
(d), (m), (s0), (s1), (s2))
struct nvfx_reg {
int8_t type;
int32_t index;
};
struct nvfx_src {
struct nvfx_reg reg;
uint8_t indirect : 1;
uint8_t indirect_reg : 1;
uint8_t indirect_swz : 2;
uint8_t negate : 1;
uint8_t abs : 1;
uint8_t swz[4];
};
struct nvfx_insn
{
uint8_t op;
char scale;
int8_t unit;
uint8_t mask;
uint8_t cc_swz[4];
uint8_t sat : 1;
uint8_t cc_update : 1;
uint8_t cc_update_reg : 1;
uint8_t cc_test : 3;
uint8_t cc_test_reg : 1;
struct nvfx_reg dst;
struct nvfx_src src[3];
};
static inline struct nvfx_insn
nvfx_insn(uint8_t sat, unsigned op, int unit, struct nvfx_reg dst, unsigned
mask, struct nvfx_src s0, struct nvfx_src s1, struct nvfx_src s2)
{
struct nvfx_insn insn = {
.op = op,
.scale = 0,
.unit = unit,
.sat = sat,
.mask = mask,
.cc_update = 0,
.cc_update_reg = 0,
.cc_test = NVFX_COND_TR,
.cc_test_reg = 0,
.cc_swz = { 0, 1, 2, 3 },
.dst = dst,
.src = {s0, s1, s2}
};
return insn;
}
static inline struct nvfx_reg
nvfx_reg(int type, int index)
{
struct nvfx_reg temp = {
.type = type,
.index = index,
};
return temp;
}
static inline struct nvfx_src
nvfx_src(struct nvfx_reg reg)
{
struct nvfx_src temp = {
.reg = reg,
.abs = 0,
.negate = 0,
.swz = { 0, 1, 2, 3 },
.indirect = 0,
};
return temp;
}
struct nvfx_insn emit_test(void)
{
const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
struct nvfx_insn insn;
struct nvfx_src src[2];
struct nvfx_reg tmp = {0, 1};
int mask, sat, unit = 0;
int ai = -1, ci = -1, ii = -1;
int i;
src[0].reg.type = 0;
src[0].reg.index = 2;
src[1].reg.type = 4;
src[1].reg.index = 8;
return arith(0, MUL, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], src[1],
none);
}
int main(void)
{
struct nvfx_insn ins = emit_test();
printf("sat? = %d\n", ins.sat);
}
```
This should print 0, with -fstore-merging it often prints 1. Valgrind shows
it's access unitialized memory. The assembly with that optimization disabled
and enabled are in the bug report filed here:
https://gitlab.freedesktop.org/mesa/mesa/-/issues/8134