On 01/10/15 14:49, Jakub Jelinek wrote:
On Thu, Oct 01, 2015 at 02:46:01PM +0200, Tom de Vries wrote:
this patch adds initialization in zero_iter_bb of counters introduced in
expand_omp_for_init_counts.

This removes the need to set TREE_NO_WARNING on those counters.

Why do you think it is a good idea?

In replace_ssa_name, I've recently added the assert:
...
          gcc_assert (!SSA_NAME_IS_DEFAULT_DEF (name));
...

On the gomp-4_0-branch, this assert triggers for a collapsed acc loop, which uses expand_omp_for_generic for omp-expansion. The assert triggers because (some of) the counters added by expand_omp_for_init_counts are not initialized on all paths.

On trunk, for the test-case in the patch, this assert doesn't trigger because the omp function is split off before ssa.

I'd be afraid it slows things down unnecessarily.

I think zero_iter_bb is a block that is expected not to be executed frequently.

I've attached an sdiff of x86_64 assembly for the test-case (before left, after right). AFAICT, this patch has the effect that it speeds up the frequent path with one instruction.

 Furthermore, I'd prefer not to change this area of code before
gomp-4_1-branch is merged, as it will be a nightmare for the merge
otherwise.

Committing to gomp-4_0-branch for now would work for me.

Thanks,
- Tom

        .file   "collapse.c"                    .file   "collapse.c"
        .text                                   .text
        .p2align 4,,15                          .p2align 4,,15
        .type   foo._omp_fn.0, @funct           .type   foo._omp_fn.0, @funct
foo._omp_fn.0:                          foo._omp_fn.0:
.LFB12:                                 .LFB12:
        .cfi_startproc                          .cfi_startproc
        pushq   %rbp                            pushq   %rbp
        .cfi_def_cfa_offset 16                  .cfi_def_cfa_offset 16
        .cfi_offset 6, -16                      .cfi_offset 6, -16
        pushq   %rbx                            pushq   %rbx
        .cfi_def_cfa_offset 24                  .cfi_def_cfa_offset 24
        .cfi_offset 3, -24                      .cfi_offset 3, -24
        xorl    %esi, %esi            <
        subq    $24, %rsp                       subq    $24, %rsp
        .cfi_def_cfa_offset 48                  .cfi_def_cfa_offset 48
        movl    (%rdi), %eax                    movl    (%rdi), %eax
        movl    4(%rdi), %ebp                   movl    4(%rdi), %ebp
        testl   %eax, %eax                      testl   %eax, %eax
        jle     .L8                   |         jle     .L9
        testl   %ebp, %ebp                      testl   %ebp, %ebp
        jle     .L8                   |         jle     .L9
        movslq  %ebp, %rbx                      movslq  %ebp, %rbx
        movslq  %eax, %rsi                      movslq  %eax, %rsi
        imulq   %rbx, %rsi                      imulq   %rbx, %rsi
.L8:                                    .L8:
        leaq    8(%rsp), %r8                    leaq    8(%rsp), %r8
        xorl    %edi, %edi                      xorl    %edi, %edi
        movq    %rsp, %rcx                      movq    %rsp, %rcx
        movl    $1, %edx                        movl    $1, %edx
        call    GOMP_loop_runtime_sta           call    GOMP_loop_runtime_sta
        testb   %al, %al                        testb   %al, %al
        jne     .L10                            jne     .L10
.L6:                                    .L6:
        call    GOMP_loop_end_nowait            call    GOMP_loop_end_nowait
        addq    $24, %rsp                       addq    $24, %rsp
        .cfi_remember_state                     .cfi_remember_state
        .cfi_def_cfa_offset 24                  .cfi_def_cfa_offset 24
        popq    %rbx                            popq    %rbx
        .cfi_def_cfa_offset 16                  .cfi_def_cfa_offset 16
        popq    %rbp                            popq    %rbp
        .cfi_def_cfa_offset 8                   .cfi_def_cfa_offset 8
        ret                                     ret
        .p2align 4,,10                          .p2align 4,,10
        .p2align 3                              .p2align 3
.L15:                                   .L15:
        .cfi_restore_state                      .cfi_restore_state
        leaq    8(%rsp), %rsi                   leaq    8(%rsp), %rsi
        movq    %rsp, %rdi                      movq    %rsp, %rdi
        call    GOMP_loop_runtime_nex           call    GOMP_loop_runtime_nex
        testb   %al, %al                        testb   %al, %al
        je      .L6                             je      .L6
.L10:                                   .L10:
        movq    (%rsp), %rsi                    movq    (%rsp), %rsi
        movq    8(%rsp), %r9                    movq    8(%rsp), %r9
        movq    %rsi, %rax                      movq    %rsi, %rax
        cqto                                    cqto
        idivq   %rbx                            idivq   %rbx
        movslq  %eax, %r8                       movslq  %eax, %r8
        .p2align 4,,10                          .p2align 4,,10
        .p2align 3                              .p2align 3
.L4:                                    .L4:
        leaq    (%r8,%r8,4), %rdi               leaq    (%r8,%r8,4), %rdi
        movslq  %edx, %rcx                      movslq  %edx, %rcx
        addq    $1, %rsi                        addq    $1, %rsi
        cmpq    %rsi, %r9                       cmpq    %rsi, %r9
        leaq    (%rdi,%rdi,4), %rdi             leaq    (%rdi,%rdi,4), %rdi
        leaq    (%rcx,%rdi,4), %rcx             leaq    (%rcx,%rdi,4), %rcx
        movl    $1, a(,%rcx,4)                  movl    $1, a(,%rcx,4)
        jle     .L15                            jle     .L15
        addl    $1, %edx                        addl    $1, %edx
        cmpl    %edx, %ebp                      cmpl    %edx, %ebp
        jg      .L4                             jg      .L4
        addl    $1, %eax                        addl    $1, %eax
        xorl    %edx, %edx                      xorl    %edx, %edx
        movslq  %eax, %r8                       movslq  %eax, %r8
        jmp     .L4                             jmp     .L4
                                      > .L9:
                                      >         xorl    %ebx, %ebx
                                      >         xorl    %esi, %esi
                                      >         jmp     .L8
        .cfi_endproc                            .cfi_endproc
.LFE12:                                 .LFE12:
        .size   foo._omp_fn.0, .-foo.           .size   foo._omp_fn.0, .-foo.
        .p2align 4,,15                          .p2align 4,,15
        .globl  foo                             .globl  foo
        .type   foo, @function                  .type   foo, @function
foo:                                    foo:
.LFB10:                                 .LFB10:
        .cfi_startproc                          .cfi_startproc
        subq    $24, %rsp                       subq    $24, %rsp
        .cfi_def_cfa_offset 32                  .cfi_def_cfa_offset 32
        xorl    %ecx, %ecx                      xorl    %ecx, %ecx
        xorl    %edx, %edx                      xorl    %edx, %edx
        movl    %edi, (%rsp)                    movl    %edi, (%rsp)
        movl    %esi, 4(%rsp)                   movl    %esi, 4(%rsp)
        movl    $foo._omp_fn.0, %edi            movl    $foo._omp_fn.0, %edi
        movq    %rsp, %rsi                      movq    %rsp, %rsi
        call    GOMP_parallel                   call    GOMP_parallel
        addq    $24, %rsp                       addq    $24, %rsp
        .cfi_def_cfa_offset 8                   .cfi_def_cfa_offset 8
        ret                                     ret
        .cfi_endproc                            .cfi_endproc
.LFE10:                                 .LFE10:
        .size   foo, .-foo                      .size   foo, .-foo

Reply via email to