I have a short piece of code that I am using for tuning an application.

  "-freduce-all-givs"

makes it run faster with some data types and slower with others.  The
info page said you were interested in such results.  I know very little
about this all.

====================

The two conditions I am comparing that may be of interest to you are 

(a) KITCHEN=-O3 -fexpensive-optimizations -ffast-math -fstrength-reduce  
-frerun-cse-after-loop -frerun-loop-opt -fschedule-insns2  
-fprefetch-loop-arrays -falign-loops -frename-registers

(b) GIVS=-freduce-all-givs $(KITCHEN)

Time in                 work units      datatype - optimation . out 
seconds                 per second

 21.737                 (459964)        long-kitchensink.out
 21.749                 (459714)        int-kitchensink.out
 23.081                 (433183)        long-givs.out
 24.445                 (409002)        short-givs.out
 27.116                 (368715)        int-givs.out
 33.834                 (295506)        double-kitchensink.out
 34.500                 (289802)        double-givs.out
 35.221                 (283869)        short-kitchensink.out
 39.376                 (253916)        float-kitchensink.out
 45.068                 (221846)        float-givs.out
 51.890                 (192681)        int.10000000.out
 51.917                 (192580)        
int--fallow-single-precision.10000000.out
 51.919                 (192575)        int--O3.10000000.out
 62.984                 (158742)        
long--fallow-single-precision.10000000.out
 63.013                 (158669)        long.10000000.out
 63.018                 (158656)        long--O3.10000000.out
 65.054                 (153691)        
short--fallow-single-precision.10000000.out
 65.055                 (153690)        short--O3.10000000.out
 65.073                 (153646)        short.10000000.out
 70.367                 (142087)        double--O3.10000000.out
 70.384                 (142053)        
double--fallow-single-precision.10000000.out
 70.403                 (142014)        double.10000000.out
 72.452                 (137997)        float--O3.10000000.out
 72.490                 (137926)        float.10000000.out
 72.491                 (137924)        
float--fallow-single-precision.10000000.out




====================

include <stdio.h>
#include <math.h>

#define NUMBER float
#define DIM 540
#define INIT 5.0
#define FINGERCOUNT  200000000


/* typedef NUMBER int; */

NUMBER aaa[DIM];
NUMBER bbb[DIM];
NUMBER kkk[DIM];

NUMBER total = 0;

int main(int argc, char* argv[]){

  int jj;
  int j;
  int v;
  int tmp;
  double fingers;
  int fingersRT;

  fingers = FINGERCOUNT;



  fprintf(stderr,"<%s>\n", argv[1]);
  if (argc == 2){
    int rc;
    rc = sscanf(argv[1],"%lf", &fingers);
    fprintf(stderr,"rc=%d\n", rc);

  }


  fingersRT= sqrt(fingers);
  fingers =   fingersRT;
  fingers =   fingers * fingersRT;
  printf("  fingersRT:%d fingers = %20.0f\n",   fingersRT, fingers);

  for(v=0; v < DIM; v++){
    aaa[v] = 1 + (bbb[v] = (1 + (kkk[v] = 3)));
  }


  for(jj=0;jj<fingersRT;jj++){
    for(j=0;j<fingersRT;j++){

      for(v=0; v < DIM; v++){

        tmp = (aaa[v] - bbb[v]);
        total += kkk[v] * tmp * tmp;


      }


    }
  }
  return (0);
}


====================
File Edit Options Buffers Tools Makefile Help
#NT=double
NT=float
#NT=int
#NT=long
#NT=short
#OPTIMIZE=-fexpensive-optimizations
#OPTIMIZE=-ffast-math
#OPTIMIZE=-fallow-single-precision
COUNT=10000000
#OPTIMIZE=-O3
#OPTIMIZE=-O
KITCHEN=-O3 -fexpensive-optimizations -ffast-math -fstrength-reduce  
-frerun-cse-after-loop -frerun-loop-opt -fschedule-insns2  
-fprefetch-loop-arrays -falign-loops -frename-registers
GIVS=-freduce-all-givs $(KITCHEN)

#run: "$(NT)"


"$(NT)-givs.out":"$(NT)-givs"
        (date +"%s%t%N"; ./$< $(COUNT); date +"%s%t%N") | tee $@




"$(NT).$(COUNT).out": "$(NT)"
        (date +"%s%t%N"; ./$< $(COUNT); date +"%s%t%N") | tee $@



"$(NT)-$(OPTIMIZE).$(COUNT).out": "$(NT)"
        (date +"%s%t%N"; ./$< $(COUNT); date +"%s%t%N") | tee $@



"$(NT)-kitchensink.out":"$(NT)-kitchensink"
        (date +"%s%t%N"; ./$< $(COUNT); date +"%s%t%N") | tee $@




"$(NT)": main.c
        gcc -lm -Wall -D NUMBER=$(NT) $< -o $@


"$(NT)-$(OPTIMIZE)": main.c
        gcc -D NUMBER=$(NT) $(OPTIMIZE) $< -o $@


"$(NT)-kitchensink": main.c
        gcc -D NUMBER=$(NT) $(KITCHEN) $< -o $@

"$(NT)-givs": main.c
        gcc -D NUMBER=$(NT) $(GIVS) $< -o $@


======================================


 Capability LSM initialized as secondary
 Mount-cache hash table entries: 256 (order: 0, 4096 bytes)
 CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
 CPU: L2 Cache: 1024K (64 bytes/line)
 CPU 0(2) -> Node 0
 CPU0: Physical Processor ID: 0
 CPU0: Processor Core ID: 0
 CPU0: Initial APIC ID: 0
 Using local APIC NMI watchdog using perfctr0
 CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
 CPU: L2 Cache: 1024K (64 bytes/line)
 CPU 0(2) -> Node 0
 CPU0: Physical Processor ID: 0
 CPU0: Processor Core ID: 0
 CPU0: Initial APIC ID: 0
 CPU0: Dual Core AMD Opteron(tm) Processor 870 HE stepping 02
 per-CPU timeslice cutoff: 1023.93 usecs.
 task migration cache decay timeout: 2 msecs.
 Booting processor 1/1 rip 6000 rsp 1068d0d3f58

======================================

uname -a
Linux acc-1 2.6.9-22.EPsmp #2 SMP Thu Feb 9 15:22:50 CST 2006 x86_64 x86_64 
x86_64 GNU/Linux

======================================



/Russell 

Reply via email to