On Thu, May 8, 2025 at 2:40 PM liuhongt <hongtao....@intel.com> wrote: > > The only part I changed is related to size_cost of sse_to_ineteger, as below > > 114+ /* Under TARGET_SSE4_1, it's vmovd + vpextrd/vpinsrd. > 115+ W/o it, it's movd + psrlq/unpckldq + movd. */ > 116+ else if (!TARGET_64BIT && smode != SImode) > 117+ cost *= TARGET_SSE4_1 ? 2 : 3; > 118+ Sorry, I missed posting a paragraph. > There is rtl_ssa::changes_are_worthwhile which solves similar problem. > Logic there seems OK to me. It computes unscaled cost in integers and > weighted cost that is updated only for hot BBs > > After both set of costs are computed, if the weighted cost indicates > that replacement is worthwhile, it will be done. > > This works well for chains that contains some portions in cold BBs with > frequency of 0 as well as for those that have everything in cold BBs. I haven't changed STV to rtl-ssa, but introduce estimated_cost_{gain, weighted_cost_sse_integer) which make the same logic as rtl_ssa::changes_are_worthwhile, meanwhile optimized_for_size is replaced with !speed_p. There are some costs using magic numbers(mainly the for_size part) which I didn't change, except for the part mentioned above. For the cost using ix86_cost->XXX, I replace them with ix86_size_cost->XXX when !speed_p.
Looks like you've updated the patch in https://gcc.gnu.org/pipermail/gcc-patches/2025-May/683218.html, I'll rebase my patch against that and post the patch again. >I also realize it would need more work, but what the pass does is quite >close to the job of late combine pass. Since late combine has all the >substitution logic nicely separated into rtl-ssa framework, converting >STv to rtl-ssa as well will likely make it easier and faster... I haven't change STV to rtl-ssa, but introduce > > Ok for trunk? > > > n some benchmark, I notice stv failed due to cost unprofitable, but the igain > is inside the loop, but sse<->integer conversion is outside the loop, current > cost > model doesn't consider the frequency of those gain/cost. > The patch weights those cost with frequency. > > The patch regressed gcc.target/i386/minmax-6.c under -m32 Since the > place of integer<->sse is before the branch, and the conversion to > min/max is in the branch, with static profile, the cost model is not > profitable anymore which is exactly the patch try to do. > Considering the original testcase is to guard RA issue, so restrict > the testcase under ! ia32 should still be ok. > > gcc/ChangeLog: > > * config/i386/i386-features.cc > (scalar_chain::mark_dual_mode_def): Weight > n_integer_to_sse/n_sse_to_integer with bb frequency. > (general_scalar_chain::compute_convert_gain): Ditto, and > adjust function prototype to return true/false when cost model > is profitable or not. > (timode_scalar_chain::compute_convert_gain): Ditto. > (convert_scalars_to_vector): Adjust after the upper two > function prototype are changed. > * config/i386/i386-features.h (class scalar_chain): Change > n_integer_to_sse/n_sse_to_integer to cost_sse_integer, and add > weighted_cost_sse_integer. > (class general_scalar_chain): Adjust prototype to return bool > intead of int. > (class timode_scalar_chain): Ditto. > > gcc/testsuite/ChangeLog: > * gcc.target/i386/minmax-6.c: Adjust testcase. > --- > gcc/config/i386/i386-features.cc | 216 ++++++++++++----------- > gcc/config/i386/i386-features.h | 11 +- > gcc/testsuite/gcc.target/i386/minmax-6.c | 2 +- > 3 files changed, 124 insertions(+), 105 deletions(-) > > diff --git a/gcc/config/i386/i386-features.cc > b/gcc/config/i386/i386-features.cc > index c35ac24fd8a..5f21130db58 100644 > --- a/gcc/config/i386/i386-features.cc > +++ b/gcc/config/i386/i386-features.cc > @@ -296,8 +296,8 @@ scalar_chain::scalar_chain (enum machine_mode smode_, > enum machine_mode vmode_) > insns_conv = BITMAP_ALLOC (NULL); > queue = NULL; > > - n_sse_to_integer = 0; > - n_integer_to_sse = 0; > + cost_sse_integer = 0; > + weighted_cost_sse_integer = 0 ; > > max_visits = x86_stv_max_visits; > } > @@ -337,20 +337,40 @@ scalar_chain::mark_dual_mode_def (df_ref def) > /* Record the def/insn pair so we can later efficiently iterate over > the defs to convert on insns not in the chain. */ > bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); > + basic_block bb = BLOCK_FOR_INSN (DF_REF_INSN (def)); > + profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; > + bool speed_p = optimize_bb_for_speed_p (bb); > + sreal bb_freq = bb->count.to_sreal_scale (entry_count); > + int cost = 0; > + > if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def))) > { > if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def)) > && !reg_new) > return; > - n_integer_to_sse++; > + > + /* ??? integer_to_sse but we only have that in the RA cost table. > + Assume sse_to_integer/integer_to_sse are the same which they. */ > + cost = speed_p ? ix86_cost->sse_to_integer > + : ix86_size_cost.sse_to_integer; > } > else > { > if (!reg_new) > return; > - n_sse_to_integer++; > + cost = speed_p ? ix86_cost->sse_to_integer > + : ix86_size_cost.sse_to_integer; > } > > + if (speed_p) > + weighted_cost_sse_integer += bb_freq * cost; > + /* Under TARGET_SSE4_1, it's vmovd + vpextrd/vpinsrd. > + W/o it, it's movd + psrlq/unpckldq + movd. */ > + else if (!TARGET_64BIT && smode != SImode) > + cost *= TARGET_SSE4_1 ? 2 : 3; > + > + cost_sse_integer += cost; > + > if (dump_file) > fprintf (dump_file, > " Mark r%d def in insn %d as requiring both modes in chain > #%d\n", > @@ -529,15 +549,15 @@ general_scalar_chain::vector_const_cost (rtx exp) > return ix86_cost->sse_load[smode == DImode ? 1 : 0]; > } > > -/* Compute a gain for chain conversion. */ > +/* Return true if it's cost profitable for chain conversion. */ > > -int > +bool > general_scalar_chain::compute_convert_gain () > { > bitmap_iterator bi; > unsigned insn_uid; > int gain = 0; > - int cost = 0; > + sreal weighted_gain = 0; > > if (dump_file) > fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); > @@ -556,25 +576,30 @@ general_scalar_chain::compute_convert_gain () > rtx src = SET_SRC (def_set); > rtx dst = SET_DEST (def_set); > int igain = 0; > + basic_block bb = BLOCK_FOR_INSN (insn); > + profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; > + bool speed_p = optimize_bb_for_speed_p (bb); > + sreal bb_freq = bb->count.to_sreal_scale (entry_count); > + const struct processor_costs* uarch_cost > + = speed_p ? ix86_cost : &ix86_size_cost; > > if (REG_P (src) && REG_P (dst)) > - igain += 2 * m - ix86_cost->xmm_move; > + igain += 2 * m - uarch_cost->xmm_move; > else if (REG_P (src) && MEM_P (dst)) > igain > - += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx]; > + += m * uarch_cost->int_store[2] - > uarch_cost->sse_store[sse_cost_idx]; > else if (MEM_P (src) && REG_P (dst)) > - igain += m * ix86_cost->int_load[2] - > ix86_cost->sse_load[sse_cost_idx]; > + igain += m * uarch_cost->int_load[2] - > uarch_cost->sse_load[sse_cost_idx]; > else > { > /* For operations on memory operands, include the overhead > of explicit load and store instructions. */ > if (MEM_P (dst)) > - igain += optimize_insn_for_size_p () > - ? -COSTS_N_BYTES (8) > - : (m * (ix86_cost->int_load[2] > - + ix86_cost->int_store[2]) > - - (ix86_cost->sse_load[sse_cost_idx] + > - ix86_cost->sse_store[sse_cost_idx])); > + igain += !speed_p ? -COSTS_N_BYTES (8) > + : (m * (uarch_cost->int_load[2] > + + uarch_cost->int_store[2]) > + - (uarch_cost->sse_load[sse_cost_idx] + > + uarch_cost->sse_store[sse_cost_idx])); > > switch (GET_CODE (src)) > { > @@ -584,15 +609,15 @@ general_scalar_chain::compute_convert_gain () > if (m == 2) > { > if (INTVAL (XEXP (src, 1)) >= 32) > - igain += ix86_cost->add; > + igain += uarch_cost->add; > /* Gain for extend highpart case. */ > else if (GET_CODE (XEXP (src, 0)) == ASHIFT) > - igain += ix86_cost->shift_const - ix86_cost->sse_op; > + igain += uarch_cost->shift_const - uarch_cost->sse_op; > else > - igain += ix86_cost->shift_const; > + igain += uarch_cost->shift_const; > } > > - igain += ix86_cost->shift_const - ix86_cost->sse_op; > + igain += uarch_cost->shift_const - uarch_cost->sse_op; > > if (CONST_INT_P (XEXP (src, 0))) > igain -= vector_const_cost (XEXP (src, 0)); > @@ -600,23 +625,23 @@ general_scalar_chain::compute_convert_gain () > > case ROTATE: > case ROTATERT: > - igain += m * ix86_cost->shift_const; > + igain += m * uarch_cost->shift_const; > if (TARGET_AVX512VL) > - igain -= ix86_cost->sse_op; > + igain -= uarch_cost->sse_op; > else if (smode == DImode) > { > int bits = INTVAL (XEXP (src, 1)); > if ((bits & 0x0f) == 0) > - igain -= ix86_cost->sse_op; > + igain -= uarch_cost->sse_op; > else if ((bits & 0x07) == 0) > - igain -= 2 * ix86_cost->sse_op; > + igain -= 2 * uarch_cost->sse_op; > else > - igain -= 3 * ix86_cost->sse_op; > + igain -= 3 * uarch_cost->sse_op; > } > else if (INTVAL (XEXP (src, 1)) == 16) > - igain -= ix86_cost->sse_op; > + igain -= uarch_cost->sse_op; > else > - igain -= 2 * ix86_cost->sse_op; > + igain -= 2 * uarch_cost->sse_op; > break; > > case AND: > @@ -624,11 +649,11 @@ general_scalar_chain::compute_convert_gain () > case XOR: > case PLUS: > case MINUS: > - igain += m * ix86_cost->add - ix86_cost->sse_op; > + igain += m * uarch_cost->add - uarch_cost->sse_op; > /* Additional gain for andnot for targets without BMI. */ > if (GET_CODE (XEXP (src, 0)) == NOT > && !TARGET_BMI) > - igain += m * ix86_cost->add; > + igain += m * uarch_cost->add; > > if (CONST_INT_P (XEXP (src, 0))) > igain -= vector_const_cost (XEXP (src, 0)); > @@ -636,21 +661,21 @@ general_scalar_chain::compute_convert_gain () > igain -= vector_const_cost (XEXP (src, 1)); > if (MEM_P (XEXP (src, 1))) > { > - if (optimize_insn_for_size_p ()) > + if (!speed_p) > igain -= COSTS_N_BYTES (m == 2 ? 3 : 5); > else > - igain += m * ix86_cost->int_load[2] > - - ix86_cost->sse_load[sse_cost_idx]; > + igain += m * uarch_cost->int_load[2] > + - uarch_cost->sse_load[sse_cost_idx]; > } > break; > > case NEG: > case NOT: > - igain -= ix86_cost->sse_op + COSTS_N_INSNS (1); > + igain -= uarch_cost->sse_op + COSTS_N_INSNS (1); > > if (GET_CODE (XEXP (src, 0)) != ABS) > { > - igain += m * ix86_cost->add; > + igain += m * uarch_cost->add; > break; > } > /* FALLTHRU */ > @@ -662,9 +687,9 @@ general_scalar_chain::compute_convert_gain () > case UMIN: > /* We do not have any conditional move cost, estimate it as a > reg-reg move. Comparisons are costed as adds. */ > - igain += m * (COSTS_N_INSNS (2) + ix86_cost->add); > + igain += m * (COSTS_N_INSNS (2) + uarch_cost->add); > /* Integer SSE ops are all costed the same. */ > - igain -= ix86_cost->sse_op; > + igain -= uarch_cost->sse_op; > break; > > case COMPARE: > @@ -698,7 +723,7 @@ general_scalar_chain::compute_convert_gain () > case CONST_INT: > if (REG_P (dst)) > { > - if (optimize_insn_for_size_p ()) > + if (!speed_p) > { > /* xor (2 bytes) vs. xorps (3 bytes). */ > if (src == const0_rtx) > @@ -727,8 +752,8 @@ general_scalar_chain::compute_convert_gain () > } > else if (MEM_P (dst)) > { > - igain += (m * ix86_cost->int_store[2] > - - ix86_cost->sse_store[sse_cost_idx]); > + igain += (m * uarch_cost->int_store[2] > + - uarch_cost->sse_store[sse_cost_idx]); > igain -= vector_const_cost (src); > } > break; > @@ -737,16 +762,16 @@ general_scalar_chain::compute_convert_gain () > if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx) > { > // movd (4 bytes) replaced with movdqa (4 bytes). > - if (!optimize_insn_for_size_p ()) > - igain += ix86_cost->sse_to_integer - ix86_cost->xmm_move; > + if (speed_p) > + igain += uarch_cost->sse_to_integer - > uarch_cost->xmm_move; > } > else > { > // pshufd; movd replaced with pshufd. > - if (optimize_insn_for_size_p ()) > + if (!speed_p) > igain += COSTS_N_BYTES (4); > else > - igain += ix86_cost->sse_to_integer; > + igain += uarch_cost->sse_to_integer; > } > break; > > @@ -755,55 +780,35 @@ general_scalar_chain::compute_convert_gain () > } > } > > + if (speed_p) > + weighted_gain += bb_freq * igain; > if (igain != 0 && dump_file) > { > - fprintf (dump_file, " Instruction gain %d for ", igain); > + fprintf (dump_file, " Instruction gain %d with bb_freq %.2f for", > + igain, bb_freq.to_double ()); > dump_insn_slim (dump_file, insn); > } > gain += igain; > } > > if (dump_file) > - fprintf (dump_file, " Instruction conversion gain: %d\n", gain); > - > - /* Cost the integer to sse and sse to integer moves. */ > - if (!optimize_function_for_size_p (cfun)) > - { > - cost += n_sse_to_integer * ix86_cost->sse_to_integer; > - /* ??? integer_to_sse but we only have that in the RA cost table. > - Assume sse_to_integer/integer_to_sse are the same which they > - are at the moment. */ > - cost += n_integer_to_sse * ix86_cost->sse_to_integer; > - } > - else if (TARGET_64BIT || smode == SImode) > - { > - cost += n_sse_to_integer * COSTS_N_BYTES (4); > - cost += n_integer_to_sse * COSTS_N_BYTES (4); > - } > - else if (TARGET_SSE4_1) > { > - /* vmovd (4 bytes) + vpextrd (6 bytes). */ > - cost += n_sse_to_integer * COSTS_N_BYTES (10); > - /* vmovd (4 bytes) + vpinsrd (6 bytes). */ > - cost += n_integer_to_sse * COSTS_N_BYTES (10); > - } > - else > - { > - /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes). */ > - cost += n_sse_to_integer * COSTS_N_BYTES (13); > - /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes). */ > - cost += n_integer_to_sse * COSTS_N_BYTES (12); > - } > - > - if (dump_file) > - fprintf (dump_file, " Registers conversion cost: %d\n", cost); > + fprintf (dump_file, " Instruction conversion gain: %d, \n", > + gain); > + fprintf (dump_file, " Registers conversion cost: %d\n", > + cost_sse_integer); > > - gain -= cost; > + fprintf (dump_file, " Weighted instruction conversion gain: %.2f, \n", > + weighted_gain.to_double ()); > > - if (dump_file) > - fprintf (dump_file, " Total gain: %d\n", gain); > + fprintf (dump_file, " Weighted registers conversion cost: %.2f\n", > + weighted_cost_sse_integer.to_double ()); > + } > > - return gain; > + if (weighted_gain != weighted_cost_sse_integer) > + return weighted_gain > weighted_cost_sse_integer; > + else > + return gain > cost_sse_integer;; > } > > /* Insert generated conversion instruction sequence INSNS > @@ -1520,21 +1525,22 @@ timode_immed_const_gain (rtx cst) > return 0; > } > > -/* Compute a gain for chain conversion. */ > +/* Return true it's cost profitable for for chain conversion. */ > > -int > +bool > timode_scalar_chain::compute_convert_gain () > { > /* Assume that if we have to move TImode values between units, > then transforming this chain isn't worth it. */ > - if (n_sse_to_integer || n_integer_to_sse) > - return -1; > + if (cost_sse_integer) > + return false; > > bitmap_iterator bi; > unsigned insn_uid; > > /* Split ties to prefer V1TImode when not optimizing for size. */ > int gain = optimize_size ? 0 : 1; > + sreal weighted_gain = 0; > > if (dump_file) > fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); > @@ -1549,31 +1555,35 @@ timode_scalar_chain::compute_convert_gain () > int scost, vcost; > int igain = 0; > > + basic_block bb = BLOCK_FOR_INSN (insn); > + profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; > + bool speed_p = optimize_bb_for_speed_p (bb); > + sreal bb_freq = bb->count.to_sreal_scale (entry_count); > + > switch (GET_CODE (src)) > { > case REG: > - if (optimize_insn_for_size_p ()) > + if (!speed_p) > igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3); > else > igain = COSTS_N_INSNS (1); > break; > > case MEM: > - igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (7) > + igain = !speed_p ? COSTS_N_BYTES (7) > : COSTS_N_INSNS (1); > break; > > case CONST_INT: > if (MEM_P (dst) > && standard_sse_constant_p (src, V1TImode)) > - igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (11) : 1; > + igain = !speed_p ? COSTS_N_BYTES (11) : 1; > break; > > case CONST_WIDE_INT: > /* 2 x mov vs. vmovdqa. */ > if (MEM_P (dst)) > - igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (3) > - : COSTS_N_INSNS (1); > + igain = !speed_p ? COSTS_N_BYTES (3) : COSTS_N_INSNS (1); > break; > > case NOT: > @@ -1594,7 +1604,7 @@ timode_scalar_chain::compute_convert_gain () > case LSHIFTRT: > /* See ix86_expand_v1ti_shift. */ > op1val = INTVAL (XEXP (src, 1)); > - if (optimize_insn_for_size_p ()) > + if (!speed_p) > { > if (op1val == 64 || op1val == 65) > scost = COSTS_N_BYTES (5); > @@ -1628,7 +1638,7 @@ timode_scalar_chain::compute_convert_gain () > case ASHIFTRT: > /* See ix86_expand_v1ti_ashiftrt. */ > op1val = INTVAL (XEXP (src, 1)); > - if (optimize_insn_for_size_p ()) > + if (!speed_p) > { > if (op1val == 64 || op1val == 127) > scost = COSTS_N_BYTES (7); > @@ -1706,7 +1716,7 @@ timode_scalar_chain::compute_convert_gain () > case ROTATERT: > /* See ix86_expand_v1ti_rotate. */ > op1val = INTVAL (XEXP (src, 1)); > - if (optimize_insn_for_size_p ()) > + if (!speed_p) > { > scost = COSTS_N_BYTES (13); > if ((op1val & 31) == 0) > @@ -1738,15 +1748,15 @@ timode_scalar_chain::compute_convert_gain () > { > if (GET_CODE (XEXP (src, 0)) == AND) > /* and;and;or (9 bytes) vs. ptest (5 bytes). */ > - igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (4) > + igain = !speed_p ? COSTS_N_BYTES (4) > : COSTS_N_INSNS (2); > /* or (3 bytes) vs. ptest (5 bytes). */ > - else if (optimize_insn_for_size_p ()) > + else if (!speed_p) > igain = -COSTS_N_BYTES (2); > } > else if (XEXP (src, 1) == const1_rtx) > /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes). */ > - igain = optimize_insn_for_size_p() ? -COSTS_N_BYTES (6) > + igain = !speed_p ? -COSTS_N_BYTES (6) > : -COSTS_N_INSNS (1); > break; > > @@ -1756,16 +1766,24 @@ timode_scalar_chain::compute_convert_gain () > > if (igain != 0 && dump_file) > { > - fprintf (dump_file, " Instruction gain %d for ", igain); > + fprintf (dump_file, " Instruction gain %d with bb_freq %.2f for ", > + igain, bb_freq.to_double ()); > dump_insn_slim (dump_file, insn); > } > + > gain += igain; > + if (speed_p) > + weighted_gain += bb_freq * igain; > } > > if (dump_file) > - fprintf (dump_file, " Total gain: %d\n", gain); > + fprintf (dump_file, " Total gain: %d, weighted gain %.2f\n", > + gain, weighted_gain.to_double ()); > > - return gain; > + if (weighted_gain > (sreal) 0) > + return true; > + else > + return gain > 0; > } > > /* Fix uses of converted REG in debug insns. */ > @@ -2561,7 +2579,7 @@ convert_scalars_to_vector (bool timode_p) > conversions. */ > if (chain->build (&candidates[i], uid, disallowed)) > { > - if (chain->compute_convert_gain () > 0) > + if (chain->compute_convert_gain ()) > converted_insns += chain->convert (); > else if (dump_file) > fprintf (dump_file, "Chain #%d conversion is not > profitable\n", > diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h > index 24b0c4ed0cd..ea2c189fea8 100644 > --- a/gcc/config/i386/i386-features.h > +++ b/gcc/config/i386/i386-features.h > @@ -153,12 +153,13 @@ class scalar_chain > > bitmap insns_conv; > hash_map<rtx, rtx> defs_map; > - unsigned n_sse_to_integer; > - unsigned n_integer_to_sse; > + /* Cost of inserted conversion between ineteger and sse. */ > + int cost_sse_integer; > + sreal weighted_cost_sse_integer; > auto_vec<rtx_insn *> control_flow_insns; > > bool build (bitmap candidates, unsigned insn_uid, bitmap disallowed); > - virtual int compute_convert_gain () = 0; > + virtual bool compute_convert_gain () = 0; > int convert (); > > protected: > @@ -184,7 +185,7 @@ class general_scalar_chain : public scalar_chain > public: > general_scalar_chain (enum machine_mode smode_, enum machine_mode vmode_) > : scalar_chain (smode_, vmode_) {} > - int compute_convert_gain () final override; > + bool compute_convert_gain () final override; > > private: > void convert_insn (rtx_insn *insn) final override; > @@ -196,7 +197,7 @@ class timode_scalar_chain : public scalar_chain > { > public: > timode_scalar_chain () : scalar_chain (TImode, V1TImode) {} > - int compute_convert_gain () final override; > + bool compute_convert_gain () final override; > > private: > void fix_debug_reg_uses (rtx reg); > diff --git a/gcc/testsuite/gcc.target/i386/minmax-6.c > b/gcc/testsuite/gcc.target/i386/minmax-6.c > index 615f919ba0a..b5b4363f931 100644 > --- a/gcc/testsuite/gcc.target/i386/minmax-6.c > +++ b/gcc/testsuite/gcc.target/i386/minmax-6.c > @@ -15,4 +15,4 @@ UMVLine16Y_11 (short unsigned int * Pic, int y, int width) > /* We do not want the RA to spill %esi for it's dual-use but using > pmaxsd is OK. */ > /* { dg-final { scan-assembler-not "rsp" { target { ! { ia32 } } } } } */ > -/* { dg-final { scan-assembler "pmaxsd" } } */ > +/* { dg-final { scan-assembler "pmaxsd" { target { ! ia32 } } } } */ > -- > 2.34.1 > -- BR, Hongtao