2014-06-25 17:50 GMT+02:00 Richard Henderson <[email protected]>:
> On 06/25/2014 08:28 AM, Jeff Law wrote:
>> Ask an ARM maintainer if the new code is actually better than the old code.
>
> It isn't.
>
>> It appears that with the peep2 pass moved that we actually if-convert the
>> fall-thru path of the conditional and eliminate the conditional. Which, on
>> the
>> surface seems like a good thing. It may be the case that we need to twiddle
>> the test. Not sure yet.
>
> Looking at the final code in the pr, it looks like we if-convert both ways,
> just with changed condition on the test.
>
> It looks like there are at least 3 peepholes in the arm backend that match
> compare+branch with a scratch register that are affected by this. I don't
> think it's reasonable to expect a peephole to match compare + n cond_exec
> insns, so running peep2 before if-conversion would seem to be called for.
>
> Kai, why does your indirect jump peephole require pass_reorder_blocks? It
> seems to me that instead of moving peephole2 down, we could move
> duplicate_computed_gotos up.
>
>
> r~
We need the peephole2 pass after reorder_blocks due we move in that
pass the jump
...
(code_label 54 52 55 5 8 "" [0 uses])
(note 55 54 56 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
(jump_insn 56 55 57 5 (set (pc)
(reg/f:DI 0 ax [orig:83 D.3469 ] [83])) 638 {*indirect_jump}
(expr_list:REG_DEAD (reg/f:DI 0 ax [orig:83 D.3469 ] [83])
(nil)))
(barrier 57 56 58)
....
into each bb, so it can be resolved to an indirect jump on memory.
Testcase to reproduce that is:
// PR 51840/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
enum {
S_atop = 0,
S_atop_f = 1,
S_atop_t = 2,
S_limit = 3
};
enum {
I_a_dec = 0,
I_a_non_zero_p = 1,
I_jmp_if_true = 2,
I_exit = 3,
I_limit = 4
};
typedef struct {
uint64_t a0;
} vm_state_t;
__attribute__((noinline, noclone)) void exec_code(vm_state_t *state,
uint8_t *code) {
static void (* volatile const vm_dispatch[S_limit][I_limit]) = {
//dispatch for [S_atop = 0][..] with four unique destination labels
{&&atop__a_dec,
&&atop__a_non_zero_p,
&&fixme,
&&atop__exit},
//dispatch for [S_atop_f = 1][..] with two unique destination labels
{&&fixme,
&&fixme,
&&atop_f__jmp_if_true,
&&fixme},
//dispatch for [S_atop_t = 2][..] with two unique destination labels
{&&fixme,
&&fixme,
&&atop_t__jmp_if_true,
&&fixme}
};
printf("atop__a_dec: %p\n", &&atop__a_dec);
printf("atop__a_non_zero_p: %p\n", &&atop__a_non_zero_p);
printf("atop__exit: %p\n", &&atop__exit);
printf("atop_f__jmp_if_true: %p\n", &&atop_f__jmp_if_true);
printf("atop_t__jmp_if_true: %p\n", &&atop_t__jmp_if_true);
printf("fixme: %p\n", &&fixme);
volatile uint64_t atop = state->a0;
volatile int64_t inst_offset=0;
goto *(vm_dispatch[S_atop][code[inst_offset]]);
atop__a_dec: {
printf("atop = %ld\n", atop);
--atop;
volatile uint64_t next_inst = code[inst_offset + 1];
inst_offset += 1;
goto *(vm_dispatch[S_atop][next_inst]);
}
atop__a_non_zero_p: {
volatile uint64_t next_inst = code[inst_offset + 1];
inst_offset += 1;
if (atop != 0) {
goto *(vm_dispatch[S_atop_t][next_inst]);
} else {
goto *(vm_dispatch[S_atop_f][next_inst]);
}
}
atop_f__jmp_if_true: {
volatile uint64_t next_inst = code[inst_offset + 2];
inst_offset += 2;
goto *(vm_dispatch[S_atop][next_inst]);
}
atop_t__jmp_if_true: {
int64_t offset = (int8_t) code[inst_offset + 1];
uint64_t next_inst = code[inst_offset + offset];
inst_offset += offset;
goto *(vm_dispatch[S_atop][next_inst]);
}
atop__exit: {
state->a0 = atop;
return;
}
fixme: {
printf("Dispatch logic ERROR\n");
exit(EXIT_FAILURE);
}
}
int main(void)
{
vm_state_t state;
state.a0 = 10;
uint8_t code[] = {I_a_dec, //decrement top of stack
I_a_non_zero_p, //true if top of stack is non-zero
I_jmp_if_true, -2, //if true jump back to decrement
I_exit}; //otherwise exit
exec_code(&state, code);
return EXIT_SUCCESS;
}
Regards,
Kai