A few more shift-by-constant improvements on the H8.
For H8/300H arithmetic right shift 15 bits, we'd conceptually like to use the shift-by-16 idiom where we move half-words around. Of course that loses a bit. But we can save away that bit into C, shift-by-16, sign-extend, then rotate through the carry one bit the other way. That saves nearly 100 cycles and is the same size. We were already using this concept elsewhere.
For H8/300H SImode arithmetic rightshift by 28, 29 or 30 bits we use the shift-by-16 + shift-by-8 sequences, sign extend, then handle the residuals inline. This saves on the order of 200 cycles vs a loop. THe sequences are larger, but we're stil talking about burning at most 10 bytes of instruction space to save 200 cycles. Obviously with -Os we revert to the loop sequence.
For H8/S HImode logical shift by 12 (right or left) we switch from using a shift-by-8 idiom, clear bits, shift-by-2, shift-by-2 to a 2 rotate by 2 positions + masking sequence which is the same size/speed on the hardware, but should simulate faster as it's one less instruction.
For H8/S SImode arithmetic right by 15 bits, we use the same trick as on the H8/300H.
For H8/S SImode logical shifts by 27 bits we can shave off 2 more bytes and 2 cycles by using a rotation based sequence over shift-by-16, shift-by-8, shift-by-2, shift-by-1 style sequence.
This has gone through the usual simulator testing without regressions. Jeff
commit 5f80c6270de6ac79d819de50048b32351a6b97c3 Author: Jeff Law <jlaw@localhost.localdomain> Date: Fri Aug 20 11:19:05 2021 -0400 Further improvements to constant shifts for the H8 gcc/ * config/h8300/h8300.c (shift_alg_hi): Improve arithmetic shift right by 15 bits for H8/300H and H8/S. Improve logical shifts by 12 bits for H8/S. (shift_alg_si): Improve arithmetic right shift by 28-30 bits for H8/300H. Improve arithmetic shift right by 15 bits for H8/S. Improve logical shifts by 27 bits for H8/S. (get_shift_alg): Corresponding changes. (h8300_option_override): Revert to loops for -Os when profitable. diff --git a/gcc/config/h8300/h8300.c b/gcc/config/h8300/h8300.c index 0c4e5089791..8ccacecba79 100644 --- a/gcc/config/h8300/h8300.c +++ b/gcc/config/h8300/h8300.c @@ -213,9 +213,9 @@ static enum shift_alg shift_alg_hi[2][3][16] = { /* 0 1 2 3 4 5 6 7 */ /* 8 9 10 11 12 13 14 15 */ { INL, INL, INL, INL, INL, INL, INL, INL, - SPC, SPC, SPC, SPC, SPC, ROT, ROT, ROT }, /* SHIFT_ASHIFT */ + SPC, SPC, SPC, SPC, ROT, ROT, ROT, ROT }, /* SHIFT_ASHIFT */ { INL, INL, INL, INL, INL, INL, INL, INL, - SPC, SPC, SPC, SPC, SPC, ROT, ROT, ROT }, /* SHIFT_LSHIFTRT */ + SPC, SPC, SPC, SPC, ROT, ROT, ROT, ROT }, /* SHIFT_LSHIFTRT */ { INL, INL, INL, INL, INL, INL, INL, INL, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFTRT */ } @@ -237,9 +237,9 @@ static enum shift_alg shift_alg_si[2][3][32] = { SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_LSHIFTRT */ { INL, INL, INL, INL, INL, INL, INL, LOP, - SPC, LOP, LOP, LOP, LOP, LOP, LOP, LOP, + SPC, LOP, LOP, LOP, LOP, LOP, LOP, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, - SPC, SPC, SPC, SPC, LOP, LOP, LOP, SPC }, /* SHIFT_ASHIFTRT */ + SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFTRT */ }, { /* TARGET_H8300S */ @@ -256,7 +256,7 @@ static enum shift_alg shift_alg_si[2][3][32] = { SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_LSHIFTRT */ { INL, INL, INL, INL, INL, INL, INL, INL, - INL, INL, INL, INL, INL, INL, INL, LOP, + INL, INL, INL, INL, INL, INL, INL, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFTRT */ } @@ -372,6 +372,9 @@ h8300_option_override (void) shift_alg_si[H8_300H][SHIFT_ASHIFTRT][25] = SHIFT_LOOP; shift_alg_si[H8_300H][SHIFT_ASHIFTRT][26] = SHIFT_LOOP; shift_alg_si[H8_300H][SHIFT_ASHIFTRT][27] = SHIFT_LOOP; + shift_alg_si[H8_300H][SHIFT_ASHIFTRT][28] = SHIFT_LOOP; + shift_alg_si[H8_300H][SHIFT_ASHIFTRT][29] = SHIFT_LOOP; + shift_alg_si[H8_300H][SHIFT_ASHIFTRT][30] = SHIFT_LOOP; /* H8S */ shift_alg_hi[H8_S][SHIFT_ASHIFTRT][14] = SHIFT_LOOP; @@ -3830,6 +3833,10 @@ get_shift_alg (enum shift_type shift_type, enum shift_mode shift_mode, } else if (count == 15) { + /* The basic idea here is to use the shift-by-16 idiom to make things + small and efficient. Of course, that loses one bit that we need, + so we stuff the bit into C, shift by 16, then rotate the bit + back in. */ switch (shift_type) { case SHIFT_ASHIFT: @@ -3841,7 +3848,9 @@ get_shift_alg (enum shift_type shift_type, enum shift_mode shift_mode, info->cc_special = OLD_CC_SET_ZNV; goto end; case SHIFT_ASHIFTRT: - gcc_unreachable (); + info->special = "shll.w\t%f0\n\tmov.w\t%e0,%f0\n\texts.l\t%S0\n\trotxl.l\t%S0"; + info->cc_special = OLD_CC_SET_ZNV; + goto end; } } else if (count >= 16 && count <= 23) @@ -3863,6 +3872,23 @@ get_shift_alg (enum shift_type shift_type, enum shift_mode shift_mode, goto end; } } + else if (TARGET_H8300S && count == 27) + { + switch (shift_type) + { + case SHIFT_ASHIFT: + info->special = "sub.w\t%e0,%e0\n\trotr.l\t#2,%S0\n\trotr.l\t#2,%S0\n\trotr.l\t%S0\n\tsub.w\t%f0,%f0"; + goto end; + case SHIFT_LSHIFTRT: + info->special = "sub.w\t%f0,%f0\n\trotl.l\t#2,%S0\n\trotl.l\t#2,%S0\n\trotl.l\t%S0\n\textu.l\t%S0"; + goto end; + case SHIFT_ASHIFTRT: + info->remainder = count - 24; + info->special = "mov.w\t%e0,%f0\n\tmov.b\t%t0,%s0\n\texts.w\t%f0\n\texts.l\t%S0"; + info->cc_special = OLD_CC_SET_ZNV; + goto end; + } + } else if (count >= 24 && count <= 27) { info->remainder = count - 24;