On 9/1/23 06:13, Jeff Law wrote:
I could very well be mistaken, but define_insn is a pattern match and opt2 has *ne* so the expression has to be in != form and thus needs to work with that condition. No ?
My point was  that

x = (a != 0) ? x : 0

is equivalent to

x = (a == 0) ? 0 : x

You can invert the condition and swap the arms and get the same semantics.  Thus if one can be supported, so can the other as they're functionally equivalent.

Ah I see what you mean. Indeed the pattern is fine, it just doesn't map to the right asm.
So we certainly need a fix but it could just very well be this:

(define_insn "*czero.nez.<GPR:mode><X:mode>.opt2"
  [(set (match_operand:GPR 0 "register_operand" "=r")
        (if_then_else:GPR (ne (match_operand:X 1 "register_operand" "r")
                              (const_int 0))
                          (match_operand:GPR 2 "register_operand" "r")
                          (match_operand:GPR 3 "register_operand" "1")))]
  "TARGET_ZICOND && rtx_equal_p (operands[1], operands[3])"
-  "czero.nez\t%0,%2,%1"
}  "czero.eqz\t%0,%2,%1"
)

It may be the at we've goof'd something in handling the inverted case, but conceptually we ought to be able to handle both.

Indeed there's a small goof as shown above.


I don't doubt you've got a failure, but it's also the case that I'm not seeing the same failure when I turn on zicond and run the execute.exp tests.  So clearly there's a difference somewhere in what we're doing.

It doesn't show up in execute.exp but as following (perhaps I should add that to commit log too).

FAIL: gcc.c-torture/execute/pr60003.c   -O1  execution test
FAIL: gcc.dg/setjmp-3.c execution test
FAIL: gcc.dg/torture/stackalign/setjmp-3.c   -O1  execution test
FAIL: gcc.dg/torture/stackalign/setjmp-3.c   -O1 -fpic execution test



So perhaps we should start with comparing assembly output for the test in question.  Can you pass yours along, I'll diff them this afternoon and see what we find.

Attached is slightly modified pr60003.c (to differentiate 'X' and 'a') and the failing asm and with fix (both the deleted pattern and modified pattern produce correct, if slightly different code).

Thx,
-Vineet
/* PR tree-optimization/60003 */
/* { dg-require-effective-target indirect_jumps } */

extern void abort (void);

unsigned long long jmp_buf[5];

__attribute__((noinline, noclone)) void
baz (void)
{
  __builtin_longjmp (&jmp_buf, 1);
}

void
bar (void)
{
  baz ();
}

__attribute__((noinline, noclone)) int
foo (int x)
{
  int a = 0;

  if (__builtin_setjmp (&jmp_buf) == 0)
    {
      while (1)
	{
	  a = 1;
	  bar ();  /* OK if baz () instead */
	}
    }
  else
    {
      if (a == 0)
	return 0;
      else
	return x;
    }
}

int
main ()
{
  if (foo (2) == 0)	// orig test has foo (1)
    return 1;

  return 0;
}
        .file   "pr60003.c"
        .option nopic
# GNU C17 (GCC) version 14.0.0 20230830 (experimental) (riscv-unknown-elf)
#       compiled by GNU C version 11.4.0, GMP version 6.1.0, MPFR version 
3.1.4, MPC version 1.0.3, isl version isl-0.18-GMP

# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: -march=rv64gc_zba_zbb_zbs_zicond -mabi=lp64d -O1
        .text
        .align  1
        .globl  baz
        .type   baz, @function
baz:
        addi    sp,sp,-16       #,,
        sd      ra,8(sp)        #,
        sd      s0,0(sp)        #,
        addi    s0,sp,16        #,,
# pr60003.c:11:   __builtin_longjmp (&jmp_buf, 1);
        lui     a5,%hi(.LANCHOR0)       # tmp135,
        addi    a5,a5,%lo(.LANCHOR0)    # tmp134, tmp135,
        ld      a4,8(a5)                # tmp136,
        ld      a3,0(a5)                # tmp137,
        ld      sp,16(a5)               #,
        mv      s0,a3   #, tmp137
        jr      a4              # tmp136
        .size   baz, .-baz
        .align  1
        .globl  bar
        .type   bar, @function
bar:
        addi    sp,sp,-16       #,,
        sd      ra,8(sp)        #,
# pr60003.c:17:   baz ();
        call    baz             #
        .size   bar, .-bar
        .align  1
        .globl  foo
        .type   foo, @function
foo:
        addi    sp,sp,-224      #,,
        sd      ra,216(sp)      #,
        sd      s0,208(sp)      #,
        sd      s1,200(sp)      #,
        sd      s2,192(sp)      #,
        sd      s3,184(sp)      #,
        sd      s4,176(sp)      #,
        sd      s5,168(sp)      #,
        sd      s6,160(sp)      #,
        sd      s7,152(sp)      #,
        sd      s8,144(sp)      #,
        sd      s9,136(sp)      #,
        sd      s10,128(sp)     #,
        sd      s11,120(sp)     #,
        fsd     fs0,104(sp)     #,
        fsd     fs1,96(sp)      #,
        fsd     fs2,88(sp)      #,
        fsd     fs3,80(sp)      #,
        fsd     fs4,72(sp)      #,
        fsd     fs5,64(sp)      #,
        fsd     fs6,56(sp)      #,
        fsd     fs7,48(sp)      #,
        fsd     fs8,40(sp)      #,
        fsd     fs9,32(sp)      #,
        fsd     fs10,24(sp)     #,
        fsd     fs11,16(sp)     #,
        sd      a0,8(sp)        # tmp142, %sfp
# pr60003.c:25:   if (__builtin_setjmp (&jmp_buf) == 0)
        lui     a5,%hi(.LANCHOR0)       # tmp138,
        addi    a5,a5,%lo(.LANCHOR0)    #
        sd      s0,0(a5)        # ^^^^^^^ orig X (2) ^^^^^^^^^
        lui     a4,%hi(.L6)     #
        addi    a4,a4,%lo(.L6)  #
        sd      a4,8(a5)        #
        sd      sp,16(a5)       #,
# pr60003.c:17:   baz ();
        call    baz             #
.L6:
# pr60003.c:25:   if (__builtin_setjmp (&jmp_buf) == 0)
# pr60003.c:40: }
        li      a5,1                    # ^^^^        (1) ^^^^^^^^^
        ld      a4,8(sp)                # ^^^^ orig X (2) ^^^^^^^^^
        czero.nez       a0,a4,a5        # ^^^^ BUG since both ops are non zero
        ld      ra,216(sp)              #,
        ld      s0,208(sp)              #,
        ld      s1,200(sp)              #,
        ld      s2,192(sp)              #,
        ld      s3,184(sp)              #,
        ld      s4,176(sp)              #,
        ld      s5,168(sp)              #,
        ld      s6,160(sp)              #,
        ld      s7,152(sp)              #,
        ld      s8,144(sp)              #,
        ld      s9,136(sp)              #,
        ld      s10,128(sp)             #,
        ld      s11,120(sp)             #,
        fld     fs0,104(sp)     #,
        fld     fs1,96(sp)      #,
        fld     fs2,88(sp)      #,
        fld     fs3,80(sp)      #,
        fld     fs4,72(sp)      #,
        fld     fs5,64(sp)      #,
        fld     fs6,56(sp)      #,
        fld     fs7,48(sp)      #,
        fld     fs8,40(sp)      #,
        fld     fs9,32(sp)      #,
        fld     fs10,24(sp)     #,
        fld     fs11,16(sp)     #,
        addi    sp,sp,224       #,,
        jr      ra              #
        .size   foo, .-foo
        .align  1
        .globl  main
        .type   main, @function
main:
        addi    sp,sp,-16       #,,
        sd      ra,8(sp)        #,
# pr60003.c:45:   if (foo (2) == 0)     // orig test has foo (1)
        li      a0,2            #,
        call    foo             #
# pr60003.c:49: }
        seqz    a0,a0   #, tmp143
        ld      ra,8(sp)                #,
        addi    sp,sp,16        #,,
        jr      ra              #
        .size   main, .-main
        .globl  jmp_buf
        .bss
        .align  3
        .set    .LANCHOR0,. + 0
        .type   jmp_buf, @object
        .size   jmp_buf, 40
jmp_buf:
        .zero   40
        .ident  "GCC: (GNU) 14.0.0 20230830 (experimental)"
        .file   "pr60003.c"
        .option nopic
        .text
        .align  1
        .globl  baz
        .type   baz, @function
baz:
        addi    sp,sp,-16
        sd      ra,8(sp)
        sd      s0,0(sp)
        addi    s0,sp,16
        lui     a5,%hi(.LANCHOR0)
        addi    a5,a5,%lo(.LANCHOR0)
        ld      a4,8(a5)
        ld      a3,0(a5)
        ld      sp,16(a5)
        mv      s0,a3
        jr      a4
        .size   baz, .-baz
        .align  1
        .globl  bar
        .type   bar, @function
bar:
        addi    sp,sp,-16
        sd      ra,8(sp)
        call    baz
        .size   bar, .-bar
        .align  1
        .globl  foo
        .type   foo, @function
foo:
        addi    sp,sp,-224
        sd      ra,216(sp)
        sd      s0,208(sp)
        sd      s1,200(sp)
        sd      s2,192(sp)
        sd      s3,184(sp)
        sd      s4,176(sp)
        sd      s5,168(sp)
        sd      s6,160(sp)
        sd      s7,152(sp)
        sd      s8,144(sp)
        sd      s9,136(sp)
        sd      s10,128(sp)
        sd      s11,120(sp)
        fsd     fs0,104(sp)
        fsd     fs1,96(sp)
        fsd     fs2,88(sp)
        fsd     fs3,80(sp)
        fsd     fs4,72(sp)
        fsd     fs5,64(sp)
        fsd     fs6,56(sp)
        fsd     fs7,48(sp)
        fsd     fs8,40(sp)
        fsd     fs9,32(sp)
        fsd     fs10,24(sp)
        fsd     fs11,16(sp)
        sd      a0,8(sp)
        lui     a5,%hi(.LANCHOR0)
        addi    a5,a5,%lo(.LANCHOR0)
        sd      s0,0(a5)
        lui     a4,%hi(.L6)
        addi    a4,a4,%lo(.L6)
        sd      a4,8(a5)
        sd      sp,16(a5)
        call    baz
.L6:
        li      a5,1
        ld      a4,8(sp)
        czero.eqz       a0,a4,a5
        ld      ra,216(sp)
        ld      s0,208(sp)
        ld      s1,200(sp)
        ld      s2,192(sp)
        ld      s3,184(sp)
        ld      s4,176(sp)
        ld      s5,168(sp)
        ld      s6,160(sp)
        ld      s7,152(sp)
        ld      s8,144(sp)
        ld      s9,136(sp)
        ld      s10,128(sp)
        ld      s11,120(sp)
        fld     fs0,104(sp)
        fld     fs1,96(sp)
        fld     fs2,88(sp)
        fld     fs3,80(sp)
        fld     fs4,72(sp)
        fld     fs5,64(sp)
        fld     fs6,56(sp)
        fld     fs7,48(sp)
        fld     fs8,40(sp)
        fld     fs9,32(sp)
        fld     fs10,24(sp)
        fld     fs11,16(sp)
        addi    sp,sp,224
        jr      ra
        .size   foo, .-foo
        .align  1
        .globl  main
        .type   main, @function
main:
        addi    sp,sp,-16
        sd      ra,8(sp)
        li      a0,2
        call    foo
        seqz    a0,a0
        ld      ra,8(sp)
        addi    sp,sp,16
        jr      ra
        .size   main, .-main
        .globl  jmp_buf
        .bss
        .align  3
        .set    .LANCHOR0,. + 0
        .type   jmp_buf, @object
        .size   jmp_buf, 40
jmp_buf:
        .zero   40
        .ident  "GCC: (GNU) 14.0.0 20230825 (experimental)"

Reply via email to