On Fri, Jan 31, 2014 at 08:43:37AM -0600, Richard Henderson wrote: > Note that the optimizer cannot simplify ANDC X,Y,C to AND X,Y,~C > so we must handle constants in the implementation of andc.
I do wonder if it actually won't be a better idea to add this simplification to the optimizer instead of adding it to the backend. The best to do that would be to check with tcg_target_const_match to see if ANDC would accept such a constraint and to convert it to AND if not. The same can probably be done for ORC. > Signed-off-by: Richard Henderson <r...@twiddle.net> > --- > tcg/i386/tcg-target.c | 52 > ++++++++++++++++++++++++++++++++++++++++----------- > tcg/i386/tcg-target.h | 6 ++++-- > 2 files changed, 45 insertions(+), 13 deletions(-) > > diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c > index 00dbc3b..4f6b9c1 100644 > --- a/tcg/i386/tcg-target.c > +++ b/tcg/i386/tcg-target.c > @@ -91,6 +91,7 @@ static const int tcg_target_call_oarg_regs[] = { > /* Constants we accept. */ > #define TCG_CT_CONST_S32 0x100 > #define TCG_CT_CONST_U32 0x200 > +#define TCG_CT_CONST_I32 0x400 > > /* Registers used with L constraint, which are the first argument > registers on x86_64, and two random call clobbered registers on > @@ -128,6 +129,10 @@ static bool have_movbe; > # define have_movbe 0 > #endif > > +/* We need this symbol in tcg-target.h, and we can't properly conditionalize > + it there. Therefore we always define the variable. */ > +bool have_bmi1; > + > static uint8_t *tb_ret_addr; > > static void patch_reloc(uint8_t *code_ptr, int type, > @@ -224,6 +229,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, > const char **pct_str) > case 'Z': > ct->ct |= TCG_CT_CONST_U32; > break; > + case 'I': > + ct->ct |= TCG_CT_CONST_I32; > + break; > > default: > return -1; > @@ -247,6 +255,9 @@ static inline int tcg_target_const_match(tcg_target_long > val, > if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { > return 1; > } > + if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) { > + return 1; > + } > return 0; > } > > @@ -276,6 +287,7 @@ static inline int tcg_target_const_match(tcg_target_long > val, > #define OPC_ARITH_EvIz (0x81) > #define OPC_ARITH_EvIb (0x83) > #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ > +#define OPC_ANDN (0xf2 | P_EXT38) > #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) > #define OPC_BSWAP (0xc8 | P_EXT) > #define OPC_CALL_Jz (0xe8) > @@ -1813,6 +1825,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode > opc, > } > break; > > + OP_32_64(andc): > + if (const_args[2]) { > + tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, > + args[0], args[1]); > + tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0); > + } else { > + tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]); > + } > + break; > + > OP_32_64(mul): > if (const_args[2]) { > int32_t val; > @@ -2041,6 +2063,7 @@ static const TCGTargetOpDef x86_op_defs[] = { > { INDEX_op_and_i32, { "r", "0", "ri" } }, > { INDEX_op_or_i32, { "r", "0", "ri" } }, > { INDEX_op_xor_i32, { "r", "0", "ri" } }, > + { INDEX_op_andc_i32, { "r", "r", "ri" } }, > > { INDEX_op_shl_i32, { "r", "0", "ci" } }, > { INDEX_op_shr_i32, { "r", "0", "ci" } }, > @@ -2098,6 +2121,7 @@ static const TCGTargetOpDef x86_op_defs[] = { > { INDEX_op_and_i64, { "r", "0", "reZ" } }, > { INDEX_op_or_i64, { "r", "0", "re" } }, > { INDEX_op_xor_i64, { "r", "0", "re" } }, > + { INDEX_op_andc_i64, { "r", "r", "rI" } }, > > { INDEX_op_shl_i64, { "r", "0", "ci" } }, > { INDEX_op_shr_i64, { "r", "0", "ci" } }, > @@ -2235,25 +2259,31 @@ static void tcg_target_qemu_prologue(TCGContext *s) > > static void tcg_target_init(TCGContext *s) > { > -#if !(defined(have_cmov) && defined(have_movbe)) > - { > - unsigned a, b, c, d; > - int ret = __get_cpuid(1, &a, &b, &c, &d); > + unsigned a, b, c, d; > + int max = __get_cpuid_max(0, 0); > > -# ifndef have_cmov > + if (max >= 1) { > + __cpuid(1, a, b, c, d); > +#ifndef have_cmov > /* For 32-bit, 99% certainty that we're running on hardware that > supports cmov, but we still need to check. In case cmov is not > available, we'll use a small forward branch. */ > - have_cmov = ret && (d & bit_CMOV); > -# endif > - > -# ifndef have_movbe > + have_cmov = (d & bit_CMOV) != 0; > +#endif > +#ifndef have_movbe > /* MOVBE is only available on Intel Atom and Haswell CPUs, so we > need to probe for it. */ > - have_movbe = ret && (c & bit_MOVBE); > -# endif > + have_movbe = (c & bit_MOVBE) != 0; > +#endif > } > + > + if (max >= 7) { > + /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */ > + __cpuid_count(7, 0, a, b, c, d); > +#ifdef bit_BMI > + have_bmi1 = (b & bit_BMI) != 0; > #endif > + } > > if (TCG_TARGET_REG_BITS == 64) { > tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); > diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h > index 747b797..bdf2222 100644 > --- a/tcg/i386/tcg-target.h > +++ b/tcg/i386/tcg-target.h > @@ -73,6 +73,8 @@ typedef enum { > #define TCG_TARGET_CALL_STACK_OFFSET 0 > #endif > > +extern bool have_bmi1; > + > /* optional instructions */ > #define TCG_TARGET_HAS_div2_i32 1 > #define TCG_TARGET_HAS_rot_i32 1 > @@ -84,7 +86,7 @@ typedef enum { > #define TCG_TARGET_HAS_bswap32_i32 1 > #define TCG_TARGET_HAS_neg_i32 1 > #define TCG_TARGET_HAS_not_i32 1 > -#define TCG_TARGET_HAS_andc_i32 0 > +#define TCG_TARGET_HAS_andc_i32 have_bmi1 > #define TCG_TARGET_HAS_orc_i32 0 > #define TCG_TARGET_HAS_eqv_i32 0 > #define TCG_TARGET_HAS_nand_i32 0 > @@ -112,7 +114,7 @@ typedef enum { > #define TCG_TARGET_HAS_bswap64_i64 1 > #define TCG_TARGET_HAS_neg_i64 1 > #define TCG_TARGET_HAS_not_i64 1 > -#define TCG_TARGET_HAS_andc_i64 0 > +#define TCG_TARGET_HAS_andc_i64 have_bmi1 > #define TCG_TARGET_HAS_orc_i64 0 > #define TCG_TARGET_HAS_eqv_i64 0 > #define TCG_TARGET_HAS_nand_i64 0 Otherwise the patch looks good to me. -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurel...@aurel32.net http://www.aurel32.net