Hello, Attached patch allows wider mask types. Is it ok for trunk?
Bootstrapped. gcc/ * config/i386/i386.c (print_reg): Сorrectly print 64-bit mask registers. (inline_secondary_memory_needed): Allow 64 bit wide mask registers. (ix86_hard_regno_mode_ok): Allow 32/64-bit mask registers and xmm/ymm16+ when availble. * config/i386/i386.h (HARD_REGNO_NREGS): Add mask regs. (VALID_AVX512F_REG_MODE): Ditto. (VALID_AVX512F_REG_MODE) : Define. (VALID_MASK_AVX512BW_MODE): Ditto. (reg_class) (MASK_REG_P(X)): Define. * config/i386/i386.md: Do not split long moves with mask register, use kmovb if avx512bw is availible. (movdi_internal): Handle mask registers. -- Thanks, K diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c77e8a6..5a3b67a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -14701,7 +14701,7 @@ print_reg (rtx x, int code, FILE *file) case 8: case 4: case 12: - if (! ANY_FP_REG_P (x)) + if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x)) putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); /* FALLTHRU */ case 16: @@ -37393,6 +37393,11 @@ inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2, if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) return true; + /* Between mask and general, we have moves no larger than word size. */ + if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2)) + && (GET_MODE_SIZE (mode) > UNITS_PER_WORD)) + return true; + /* ??? This is a lie. We do have moves between mmx/general, and for mmx/sse2. But by saying we need secondary memory we discourage the register allocator from using the mmx registers unless needed. */ @@ -37698,7 +37703,8 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) if (STACK_REGNO_P (regno)) return VALID_FP_MODE_P (mode); if (MASK_REGNO_P (regno)) - return VALID_MASK_REG_MODE (mode); + return (VALID_MASK_REG_MODE (mode) + || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode))); if (SSE_REGNO_P (regno)) { /* We implement the move patterns for all vector modes into and @@ -37715,6 +37721,15 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) || VALID_AVX512F_SCALAR_MODE (mode))) return true; + /* TODO check for QI/HI scalars. */ + /* AVX512VL allows sse regs16+ for 128/256 bit modes. */ + if (TARGET_AVX512VL + && (mode == OImode + || mode == TImode + || VALID_AVX256_REG_MODE (mode) + || VALID_AVX512VL_128_REG_MODE (mode))) + return true; + /* xmm16-xmm31 are only available for AVX-512. */ if (EXT_REX_SSE_REGNO_P (regno)) return false; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 8677e6b..c2f0cee 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1054,7 +1054,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); applied to them. */ #define HARD_REGNO_NREGS(REGNO, MODE) \ - (STACK_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \ + (STACK_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) \ + || MMX_REGNO_P (REGNO) || MASK_REGNO_P (REGNO) \ ? (COMPLEX_MODE_P (MODE) ? 2 : 1) \ : ((MODE) == XFmode \ ? (TARGET_64BIT ? 2 : 3) \ @@ -1085,7 +1086,12 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define VALID_AVX512F_REG_MODE(MODE) \ ((MODE) == V8DImode || (MODE) == V8DFmode || (MODE) == V64QImode \ - || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode) + || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode \ + || (MODE) == V4TImode) + +#define VALID_AVX512VL_128_REG_MODE(MODE) \ + ((MODE) == V2DImode || (MODE) == V2DFmode || (MODE) == V16QImode \ + || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode) #define VALID_SSE2_REG_MODE(MODE) \ ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ @@ -1132,6 +1138,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode) +#define VALID_MASK_AVX512BW_MODE(MODE) ((MODE) == SImode || (MODE) == DImode) + /* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */ #define HARD_REGNO_MODE_OK(REGNO, MODE) \ @@ -1454,6 +1462,7 @@ enum reg_class : (N) <= LAST_REX_SSE_REG ? (FIRST_REX_SSE_REG + (N) - 8) \ : (FIRST_EXT_REX_SSE_REG + (N) - 16)) +#define MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X))) #define MASK_REGNO_P(N) IN_RANGE ((N), FIRST_MASK_REG, LAST_MASK_REG) #define ANY_MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X))) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 3cb8b67..4867e7e 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -952,6 +952,9 @@ ;; Instruction suffix for integer modes. (define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")]) +;; Instruction suffix for masks. +(define_mode_attr mskmodesuffix [(QI "b") (HI "w") (SI "d") (DI "q")]) + ;; Pointer size prefix for integer modes (Intel asm dialect) (define_mode_attr iptrsize [(QI "BYTE") (HI "WORD") @@ -2022,13 +2025,16 @@ (define_insn "*movdi_internal" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi") + "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi,*k,*k ,*r ,*m") (match_operand:DI 1 "general_operand" - "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn"))] + "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn ,*r ,*km,*k,*k"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) { + case TYPE_MSKMOV: + return "kmovq\t{%1, %0|%0, %1}"; + case TYPE_MULTI: return "#"; @@ -2099,7 +2105,7 @@ [(set (attr "isa") (cond [(eq_attr "alternative" "0,1") (const_string "nox64") - (eq_attr "alternative" "2,3,4,5,10,11,16,18") + (eq_attr "alternative" "2,3,4,5,10,11,16,18,21,23") (const_string "x64") (eq_attr "alternative" "17") (const_string "x64_sse4") @@ -2118,6 +2124,8 @@ (const_string "ssemov") (eq_attr "alternative" "19,20") (const_string "ssecvt") + (eq_attr "alternative" "21,22,23,24") + (const_string "mskmov") (match_operand 1 "pic_32bit_operand") (const_string "lea") ] @@ -2179,16 +2187,20 @@ [(set (match_operand:DI 0 "nonimmediate_operand") (match_operand:DI 1 "general_operand"))] "!TARGET_64BIT && reload_completed - && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0])) - && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))" + && !(MMX_REG_P (operands[0]) + || SSE_REG_P (operands[0]) + || MASK_REG_P (operands[0])) + && !(MMX_REG_P (operands[1]) + || SSE_REG_P (operands[1]) + || MASK_REG_P (operands[1]))" [(const_int 0)] "ix86_split_long_move (operands); DONE;") (define_insn "*movsi_internal" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi") + "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi,*k ,*rm") (match_operand:SI 1 "general_operand" - "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r"))] + "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r ,*krm,*k"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2199,6 +2211,9 @@ return standard_sse_constant_opcode (insn, operands[1]); + case TYPE_MSKMOV: + return "kmovd\t{%1, %0|%0, %1}"; + case TYPE_SSEMOV: switch (get_attr_mode (insn)) { @@ -2262,6 +2277,8 @@ (const_string "sselog1") (eq_attr "alternative" "7,8,9,10,12") (const_string "ssemov") + (eq_attr "alternative" "13,14") + (const_string "mskmov") (match_operand 1 "pic_32bit_operand") (const_string "lea") ] @@ -2410,9 +2427,12 @@ case TYPE_MSKMOV: switch (which_alternative) { - case 7: return "kmovw\t{%k1, %0|%0, %k1}"; - case 8: return "kmovw\t{%1, %0|%0, %1}"; - case 9: return "kmovw\t{%1, %k0|%k0, %1}"; + case 7: return TARGET_AVX512BW ? "kmovb\t{%k1, %0|%0, %k1}" + : "kmovw\t{%k1, %0|%0, %k1}"; + case 8: return TARGET_AVX512BW ? "kmovb\t{%1, %0|%0, %1}" + : "kmovw\t{%1, %0|%0, %1}"; + case 9: return TARGET_AVX512BW ? "kmovb\t{%1, %k0|%k0, %1}" + : "kmovw\t{%1, %k0|%k0, %1}"; default: gcc_unreachable (); }