http://gcc.gnu.org/bugzilla/show_bug.cgi?id=56127



             Bug #: 56127

           Summary: Incorrect code with -O2

    Classification: Unclassified

           Product: gcc

           Version: 4.6.3

            Status: UNCONFIRMED

          Severity: normal

          Priority: P3

         Component: c++

        AssignedTo: unassig...@gcc.gnu.org

        ReportedBy: trosenb...@gmail.com





Created attachment 29290

  --> http://gcc.gnu.org/bugzilla/attachment.cgi?id=29290

main.ii



It seems like GCC 4.6.3 generates incorrect assembly for ARM Cortex-A9 from the

following C++ code when invoked with -O1, -O2 and -O3

The code just increments two variables 100 times and divides them.



Correct output is:

average =     1.000

PMTsum = 100  nValues = 100



but -O1 gives this:

average =     0.000

PMTsum = 100  nValues = 100



and -O2 and -O3 yield this:

average =       inf

PMTsum = 100  nValues = 100



This 3-file C++ project is the minimal configuration that shows this behavior. 

With everything in one file the problem vanishes.



Below the C++ code is the assembly language output, marked with *** where I

think the error may be (but I'm unfamiliar with assembly language).



Thanks to anyone who looks into this, and to everyone who has been developing

GCC!



//exp_results.h



class exp_results

{

public:

exp_results() :

    PMTsum(0),

    nValues(0)

{

}



unsigned increment();



double get_average()

{

    if (nValues)

        return ((double)PMTsum) / ((double)nValues);

    else

        return 0;

}



unsigned PMTsum;

unsigned nValues;

};





//exp_results.cpp



#include "exp_results.h"





unsigned exp_results::increment()

{

    PMTsum++;

    nValues++;



    return 1;

}



//main.cc



#include <stdio.h>

#include "exp_results.h"



int main()

{

    exp_results r;



    for(unsigned i=0; i< 100; i++)

        r.increment();



    printf("average = %9.3f\n", r.get_average());

    printf("PMTsum = %u  nValues = %u\n", r.PMTsum, r.nValues);

    return 0;

}





// main.s



    .cpu cortex-a9

    .eabi_attribute 27, 3

    .fpu neon-fp16

    .eabi_attribute 20, 1

    .eabi_attribute 21, 1

    .eabi_attribute 23, 3

    .eabi_attribute 24, 1

    .eabi_attribute 25, 1

    .eabi_attribute 26, 1

    .eabi_attribute 30, 2

    .eabi_attribute 34, 1

    .eabi_attribute 18, 4

    .file    "main.cc"

@ GNU C++ (Sourcery CodeBench Lite 2012.03-83) version 4.6.3 (arm-xilinx-eabi)

@    compiled by GNU C version 4.3.2, GMP version 4.3.2, MPFR version 3.0.1-p4,

MPC version 0.9

@ GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072

@ options passed:  -fpreprocessed main.ii -mcpu=cortex-a9

@ -mfloat-abi=softfp -mfpu=neon-fp16 -auxbase-strip src/main.o -O2 -Wall

@ -fmessage-length=0 -fverbose-asm -fremove-local-statics

@ options enabled:  -fauto-inc-dec -fbranch-count-reg -fcaller-saves

@ -fcombine-stack-adjustments -fcommon -fcompare-elim -fcprop-registers

@ -fcrossjumping -fcse-follow-jumps -fdefer-pop

@ -fdelete-null-pointer-checks -fdevirtualize -fdwarf2-cfi-asm

@ -fearly-inlining -feliminate-unused-debug-types -fexceptions

@ -fexpensive-optimizations -fextension-elimination -fforward-propagate

@ -ffunction-cse -fgcse -fgcse-lm -fguess-branch-probability -fident

@ -fif-conversion -fif-conversion2 -findirect-inlining -finline

@ -finline-functions-called-once -finline-small-functions -fipa-cp

@ -fipa-profile -fipa-pure-const -fipa-reference -fipa-sra

@ -fira-share-save-slots -fira-share-spill-slots -fivopts

@ -fkeep-static-consts -fleading-underscore -fmath-errno -fmerge-constants

@ -fmerge-debug-strings -fmove-loop-invariants -fomit-frame-pointer

@ -foptimize-register-move -foptimize-sibling-calls -fpartial-inlining

@ -fpeephole -fpeephole2 -fprefetch-loop-arrays -fpromote-loop-indices

@ -freg-struct-return -fregmove -frename-registers -freorder-blocks

@ -freorder-functions -frerun-cse-after-loop

@ -fsched-critical-path-heuristic -fsched-dep-count-heuristic

@ -fsched-group-heuristic -fsched-interblock -fsched-last-insn-heuristic

@ -fsched-rank-heuristic -fsched-spec -fsched-spec-insn-heuristic

@ -fsched-stalled-insns-dep -fschedule-insns -fschedule-insns2

@ -fsection-anchors -fshow-column -fsigned-zeros -fsplit-ivs-in-unroller

@ -fsplit-wide-types -fstrict-aliasing -fstrict-overflow

@ -fstrict-volatile-bitfields -fthread-jumps -ftoplevel-reorder

@ -ftrapping-math -ftree-bit-ccp -ftree-builtin-call-dce -ftree-ccp

@ -ftree-ch -ftree-copy-prop -ftree-copyrename -ftree-cselim -ftree-dce

@ -ftree-dominator-opts -ftree-dse -ftree-forwprop -ftree-fre

@ -ftree-if-to-switch-conversion -ftree-loop-if-convert -ftree-loop-im

@ -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops=

@ -ftree-phiprop -ftree-pre -ftree-pta -ftree-reassoc -ftree-scev-cprop

@ -ftree-sink -ftree-slp-vectorize -ftree-sra -ftree-switch-conversion

@ -ftree-ter -ftree-vect-loop-version -ftree-vrp -funit-at-a-time

@ -funroll-loops -fverbose-asm -fweb -fzero-initialized-in-bss

@ -mlittle-endian -msched-prolog -munaligned-access



@ Compiler executable checksum: af2616fad9f2abb21c14f2e52d2eaee7



    .section    .text.startup,"ax",%progbits

    .align    2

    .global    main

    .type    main, %function

main:

    .fnstart

.LFB4:

    @ args = 0, pretend = 0, frame = 8

    @ frame_needed = 0, uses_anonymous_args = 0

    stmfd    sp!, {r4, lr}    @,

    .save {r4, lr}

    mov    r4, #100    @ ivtmp.3,

    .pad #8

    sub    sp, sp, #8    @,,

    mov    r3, #0    @ tmp144,

    str    r3, [sp, #0]    @ tmp144, r.PMTsum

    str    r3, [sp, #4]    @ tmp144, r.nValues

.L3:

    mov    r0, sp    @,

    sub    r4, r4, #1    @ tmp156, ivtmp.3,

    bl    _ZN11exp_results9incrementEv    @

    mov    r0, sp    @,

    bl    _ZN11exp_results9incrementEv    @

    subs    r4, r4, #1    @ ivtmp.3, tmp156,

    bne    .L3    @,

    ldr    r3, [sp, #4]    @ D.6224, r.nValues

    movw    r0, #:lower16:.LC0    @,

    movt    r0, #:upper16:.LC0    @,

    cmp    r3, #0    @ D.6224,

    fmsrne    s15, r3    @ int    @, D.6224

    fuitodne    d16, s15    @ tmp149,

    fldsne    s15, [sp, #0]    @ int    @, r.PMTsum

    vmov.i32    d16, #0    @ D.6219

//*** load 0 into d16 (why? is this the bug?)  ***

    fuitodne    d17, s15    @ tmp147,

    fdivdne    d16, d17, d16    @ D.6219, tmp147, tmp149

    fmrrd    r2, r3, d16    @, D.6219

    bl    printf    @

    movw    r0, #:lower16:.LC1    @,

    ldmia    sp, {r1, r2}    @,,

    movt    r0, #:upper16:.LC1    @,

    bl    printf    @

    mov    r0, #0    @,

    add    sp, sp, #8    @,,

    ldmfd    sp!, {r4, pc}

    .fnend

    .size    main, .-main

    .section    .rodata.str1.4,"aMS",%progbits,1

    .align    2

.LC0:

    .ascii    "average = %9.3f\012\000"

    .space    3

.LC1:

    .ascii    "PMTsum = %u  nValues = %u\012\000"

    .ident    "GCC: (Sourcery CodeBench Lite 2012.03-83) 4.6.3"





// exp_results.s



    .cpu cortex-a9

    .eabi_attribute 27, 3

    .fpu neon-fp16

    .eabi_attribute 20, 1

    .eabi_attribute 21, 1

    .eabi_attribute 23, 3

    .eabi_attribute 24, 1

    .eabi_attribute 25, 1

    .eabi_attribute 26, 1

    .eabi_attribute 30, 2

    .eabi_attribute 34, 1

    .eabi_attribute 18, 4

    .file    "exp_results.cpp"

@ GNU C++ (Sourcery CodeBench Lite 2012.03-83) version 4.6.3 (arm-xilinx-eabi)

@    compiled by GNU C version 4.3.2, GMP version 4.3.2, MPFR version 3.0.1-p4,

MPC version 0.9

@ GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072

@ options passed:  -fpreprocessed exp_results.ii -mcpu=cortex-a9

@ -mfloat-abi=softfp -mfpu=neon-fp16 -auxbase-strip src/exp_results.o -O2

@ -Wall -fmessage-length=0 -fverbose-asm -fremove-local-statics

@ options enabled:  -fauto-inc-dec -fbranch-count-reg -fcaller-saves

@ -fcombine-stack-adjustments -fcommon -fcompare-elim -fcprop-registers

@ -fcrossjumping -fcse-follow-jumps -fdefer-pop

@ -fdelete-null-pointer-checks -fdevirtualize -fdwarf2-cfi-asm

@ -fearly-inlining -feliminate-unused-debug-types -fexceptions

@ -fexpensive-optimizations -fextension-elimination -fforward-propagate
@ -ffunction-cse -fgcse -fgcse-lm -fguess-branch-probability -fident

@ -fif-conversion -fif-conversion2 -findirect-inlining -finline

@ -finline-functions-called-once -finline-small-functions -fipa-cp

@ -fipa-profile -fipa-pure-const -fipa-reference -fipa-sra

@ -fira-share-save-slots -fira-share-spill-slots -fivopts

@ -fkeep-static-consts -fleading-underscore -fmath-errno -fmerge-constants

@ -fmerge-debug-strings -fmove-loop-invariants -fomit-frame-pointer

@ -foptimize-register-move -foptimize-sibling-calls -fpartial-inlining

@ -fpeephole -fpeephole2 -fprefetch-loop-arrays -fpromote-loop-indices

@ -freg-struct-return -fregmove -frename-registers -freorder-blocks

@ -freorder-functions -frerun-cse-after-loop

@ -fsched-critical-path-heuristic -fsched-dep-count-heuristic

@ -fsched-group-heuristic -fsched-interblock -fsched-last-insn-heuristic

@ -fsched-rank-heuristic -fsched-spec -fsched-spec-insn-heuristic

@ -fsched-stalled-insns-dep -fschedule-insns -fschedule-insns2

@ -fsection-anchors -fshow-column -fsigned-zeros -fsplit-ivs-in-unroller

@ -fsplit-wide-types -fstrict-aliasing -fstrict-overflow

@ -fstrict-volatile-bitfields -fthread-jumps -ftoplevel-reorder

@ -ftrapping-math -ftree-bit-ccp -ftree-builtin-call-dce -ftree-ccp

@ -ftree-ch -ftree-copy-prop -ftree-copyrename -ftree-cselim -ftree-dce

@ -ftree-dominator-opts -ftree-dse -ftree-forwprop -ftree-fre

@ -ftree-if-to-switch-conversion -ftree-loop-if-convert -ftree-loop-im

@ -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops=

@ -ftree-phiprop -ftree-pre -ftree-pta -ftree-reassoc -ftree-scev-cprop

@ -ftree-sink -ftree-slp-vectorize -ftree-sra -ftree-switch-conversion

@ -ftree-ter -ftree-vect-loop-version -ftree-vrp -funit-at-a-time

@ -funroll-loops -fverbose-asm -fweb -fzero-initialized-in-bss

@ -mlittle-endian -msched-prolog -munaligned-access



@ Compiler executable checksum: af2616fad9f2abb21c14f2e52d2eaee7



    .text

    .align    2

    .global    _ZN11exp_results9incrementEv

    .type    _ZN11exp_results9incrementEv, %function

_ZN11exp_results9incrementEv:

    .fnstart

.LFB4:

    @ args = 0, pretend = 0, frame = 0

    @ frame_needed = 0, uses_anonymous_args = 0

    @ link register save eliminated.

    mov    r3, r0    @ this, this

    mov    r0, #1    @,

    ldmia    r3, {r1, r2}    @ this,,

    add    r1, r1, r0    @ tmp141, this_1(D)->PMTsum,

    add    r2, r2, r0    @ tmp143, this_1(D)->nValues,

    stmia    r3, {r1, r2}    @ this,,

    bx    lr    @

    .cantunwind

    .fnend

    .size    _ZN11exp_results9incrementEv, .-_ZN11exp_results9incrementEv

    .ident    "GCC: (Sourcery CodeBench Lite 2012.03-83) 4.6.3"

Reply via email to