https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118952

            Bug ID: 118952
           Summary: AArch64 get_fpcr and set_fpcr builtins don't block
                    reordering of operations past them
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Keywords: wrong-code
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: ktkachov at gcc dot gnu.org
  Target Milestone: ---
            Target: aarch64

The __builtin_aarch64_set_fpcr and __builtin_aarch64_get_fpcr builtins are not
as useful in practice as we'd like. For the input code:

#include <stdint.h>
#include <string.h>

uint64_t foo (uint32_t *in_fpcr, uint32_t src) {
    uint64_t dst;
    uint32_t saved_fpcr;
    float fsrc;

    saved_fpcr = __builtin_aarch64_get_fpcr();
    __builtin_aarch64_set_fpcr(*in_fpcr);

    memcpy(&fsrc, &src, 4);
    double d = (double) fsrc;
    memcpy(&dst, &d, 8);

    *in_fpcr = __builtin_aarch64_get_fpcr();
    __builtin_aarch64_set_fpcr(saved_fpcr);
    return dst;
}

at -O2 we get:
foo:
        fmov    s31, w1
        mrs     x1, fpcr
        ldr     w2, [x0]
        msr     fpcr, x2
        mrs     x2, fpcr
        str     w2, [x0]
        msr     fpcr, x1
        fcvt    d31, s31
        fmov    x0, d31
        ret

The problem is that the fcvt is moved outside the region that has a modified
FPCR, defeating the purpose of the builtins.
I initially thought this was the RTL insn scheduler moving the operations but
the RTL patterns for the builtins do use unspec_volatile that is supposed to
prevent such movement.
But the problem seems to be at expand-time. The GIMPLE looks correct:

  saved_fpcr_6 = __builtin_aarch64_get_fpcr ();
  _1 = *in_fpcr_7(D);
  __builtin_aarch64_set_fpcr (_1);
  _14 = VIEW_CONVERT_EXPR<float>(src_9(D));
  _2 = (double) _14;
  _10 = VIEW_CONVERT_EXPR<unsigned long>(_2);
  _3 = __builtin_aarch64_get_fpcr ();
  *in_fpcr_7(D) = _3;
  __builtin_aarch64_set_fpcr (saved_fpcr_6);
  return _10;

but the RTL generation is:
(insn 2 5 3 2 (set (reg/v/f:DI 107 [ in_fpcr ])
        (reg:DI 0 x0 [ in_fpcr ])) "fpcr.c":4:48 -1
     (nil))
(insn 3 2 4 2 (set (reg/v:SI 108 [ src ])
        (reg:SI 1 x1 [ src ])) "fpcr.c":4:48 -1
     (nil))
(note 4 3 7 2 NOTE_INSN_FUNCTION_BEG)
(insn 7 4 8 2 (set (reg/v:SI 104 [ saved_fpcr ])
        (unspec_volatile:SI [
                (const_int 0 [0])
            ] UNSPECV_GET_FPCR)) "fpcr.c":9:18 -1
     (nil))
(insn 8 7 9 2 (set (reg:SI 109)
        (mem:SI (reg/v/f:DI 107 [ in_fpcr ]) [1 *in_fpcr_7(D)+0 S4 A32]))
"fpcr.c":10:5 -1
     (nil))
(insn 9 8 10 2 (unspec_volatile [
            (reg:SI 109)
        ] UNSPECV_SET_FPCR) "fpcr.c":10:5 -1
     (nil))
(insn 10 9 11 2 (set (reg:SI 103 [ _3 ])
        (unspec_volatile:SI [
                (const_int 0 [0])
            ] UNSPECV_GET_FPCR)) "fpcr.c":16:16 -1
     (nil))
(insn 11 10 12 2 (set (mem:SI (reg/v/f:DI 107 [ in_fpcr ]) [1 *in_fpcr_7(D)+0
S4 A32])
        (reg:SI 103 [ _3 ])) "fpcr.c":16:14 discrim 1 -1
     (nil))
(insn 12 11 13 2 (unspec_volatile [
            (reg/v:SI 104 [ saved_fpcr ])
        ] UNSPECV_SET_FPCR) "fpcr.c":17:5 -1
     (nil))
(insn 13 12 14 2 (set (reg:DF 111 [ _2 ])
        (float_extend:DF (subreg:SF (reg/v:SI 108 [ src ]) 0))) "fpcr.c":13:16
-1
     (nil))
(insn 14 13 18 2 (set (reg:DI 106 [ <retval> ])
        (subreg:DI (reg:DF 111 [ _2 ]) 0)) "fpcr.c":18:12 -1
     (nil))

insn 13 has been moved past the GET_FPCR and SET_FPCR builtins. Is that
something the out-of-ssa code is doing?

Reply via email to