On 01/10/15 14:49, Jakub Jelinek wrote:
On Thu, Oct 01, 2015 at 02:46:01PM +0200, Tom de Vries wrote:
this patch adds initialization in zero_iter_bb of counters introduced in
expand_omp_for_init_counts.
This removes the need to set TREE_NO_WARNING on those counters.
Why do you think it is a good idea?
In replace_ssa_name, I've recently added the assert:
...
gcc_assert (!SSA_NAME_IS_DEFAULT_DEF (name));
...
On the gomp-4_0-branch, this assert triggers for a collapsed acc loop,
which uses expand_omp_for_generic for omp-expansion. The assert
triggers because (some of) the counters added by
expand_omp_for_init_counts are not initialized on all paths.
On trunk, for the test-case in the patch, this assert doesn't trigger
because the omp function is split off before ssa.
I'd be afraid it slows things down unnecessarily.
I think zero_iter_bb is a block that is expected not to be executed
frequently.
I've attached an sdiff of x86_64 assembly for the test-case (before
left, after right). AFAICT, this patch has the effect that it speeds up
the frequent path with one instruction.
Furthermore, I'd prefer not to change this area of code before
gomp-4_1-branch is merged, as it will be a nightmare for the merge
otherwise.
Committing to gomp-4_0-branch for now would work for me.
Thanks,
- Tom
.file "collapse.c" .file "collapse.c"
.text .text
.p2align 4,,15 .p2align 4,,15
.type foo._omp_fn.0, @funct .type foo._omp_fn.0, @funct
foo._omp_fn.0: foo._omp_fn.0:
.LFB12: .LFB12:
.cfi_startproc .cfi_startproc
pushq %rbp pushq %rbp
.cfi_def_cfa_offset 16 .cfi_def_cfa_offset 16
.cfi_offset 6, -16 .cfi_offset 6, -16
pushq %rbx pushq %rbx
.cfi_def_cfa_offset 24 .cfi_def_cfa_offset 24
.cfi_offset 3, -24 .cfi_offset 3, -24
xorl %esi, %esi <
subq $24, %rsp subq $24, %rsp
.cfi_def_cfa_offset 48 .cfi_def_cfa_offset 48
movl (%rdi), %eax movl (%rdi), %eax
movl 4(%rdi), %ebp movl 4(%rdi), %ebp
testl %eax, %eax testl %eax, %eax
jle .L8 | jle .L9
testl %ebp, %ebp testl %ebp, %ebp
jle .L8 | jle .L9
movslq %ebp, %rbx movslq %ebp, %rbx
movslq %eax, %rsi movslq %eax, %rsi
imulq %rbx, %rsi imulq %rbx, %rsi
.L8: .L8:
leaq 8(%rsp), %r8 leaq 8(%rsp), %r8
xorl %edi, %edi xorl %edi, %edi
movq %rsp, %rcx movq %rsp, %rcx
movl $1, %edx movl $1, %edx
call GOMP_loop_runtime_sta call GOMP_loop_runtime_sta
testb %al, %al testb %al, %al
jne .L10 jne .L10
.L6: .L6:
call GOMP_loop_end_nowait call GOMP_loop_end_nowait
addq $24, %rsp addq $24, %rsp
.cfi_remember_state .cfi_remember_state
.cfi_def_cfa_offset 24 .cfi_def_cfa_offset 24
popq %rbx popq %rbx
.cfi_def_cfa_offset 16 .cfi_def_cfa_offset 16
popq %rbp popq %rbp
.cfi_def_cfa_offset 8 .cfi_def_cfa_offset 8
ret ret
.p2align 4,,10 .p2align 4,,10
.p2align 3 .p2align 3
.L15: .L15:
.cfi_restore_state .cfi_restore_state
leaq 8(%rsp), %rsi leaq 8(%rsp), %rsi
movq %rsp, %rdi movq %rsp, %rdi
call GOMP_loop_runtime_nex call GOMP_loop_runtime_nex
testb %al, %al testb %al, %al
je .L6 je .L6
.L10: .L10:
movq (%rsp), %rsi movq (%rsp), %rsi
movq 8(%rsp), %r9 movq 8(%rsp), %r9
movq %rsi, %rax movq %rsi, %rax
cqto cqto
idivq %rbx idivq %rbx
movslq %eax, %r8 movslq %eax, %r8
.p2align 4,,10 .p2align 4,,10
.p2align 3 .p2align 3
.L4: .L4:
leaq (%r8,%r8,4), %rdi leaq (%r8,%r8,4), %rdi
movslq %edx, %rcx movslq %edx, %rcx
addq $1, %rsi addq $1, %rsi
cmpq %rsi, %r9 cmpq %rsi, %r9
leaq (%rdi,%rdi,4), %rdi leaq (%rdi,%rdi,4), %rdi
leaq (%rcx,%rdi,4), %rcx leaq (%rcx,%rdi,4), %rcx
movl $1, a(,%rcx,4) movl $1, a(,%rcx,4)
jle .L15 jle .L15
addl $1, %edx addl $1, %edx
cmpl %edx, %ebp cmpl %edx, %ebp
jg .L4 jg .L4
addl $1, %eax addl $1, %eax
xorl %edx, %edx xorl %edx, %edx
movslq %eax, %r8 movslq %eax, %r8
jmp .L4 jmp .L4
> .L9:
> xorl %ebx, %ebx
> xorl %esi, %esi
> jmp .L8
.cfi_endproc .cfi_endproc
.LFE12: .LFE12:
.size foo._omp_fn.0, .-foo. .size foo._omp_fn.0, .-foo.
.p2align 4,,15 .p2align 4,,15
.globl foo .globl foo
.type foo, @function .type foo, @function
foo: foo:
.LFB10: .LFB10:
.cfi_startproc .cfi_startproc
subq $24, %rsp subq $24, %rsp
.cfi_def_cfa_offset 32 .cfi_def_cfa_offset 32
xorl %ecx, %ecx xorl %ecx, %ecx
xorl %edx, %edx xorl %edx, %edx
movl %edi, (%rsp) movl %edi, (%rsp)
movl %esi, 4(%rsp) movl %esi, 4(%rsp)
movl $foo._omp_fn.0, %edi movl $foo._omp_fn.0, %edi
movq %rsp, %rsi movq %rsp, %rsi
call GOMP_parallel call GOMP_parallel
addq $24, %rsp addq $24, %rsp
.cfi_def_cfa_offset 8 .cfi_def_cfa_offset 8
ret ret
.cfi_endproc .cfi_endproc
.LFE10: .LFE10:
.size foo, .-foo .size foo, .-foo