> Could it be that your CPU has a single floating-point unit shared by 4
> cores on a single die, and thus only 2 floating-point units total for
> all 8 of your cores? If so, then that fact, plus the fact that each
> core has its own separate ALU for integer operations, would seem to
> explain the results you are seeing.
Exactly, this would explain the behaviour. But unfortunately it is not
the case. I implemented a small example using Java (Java Threads) and
C (PThreads) and both times I get a linear speedup. See the attached
code below. The cores only share 12 MB cache, but this should be
enough memory for my micro-benchmark. Seeing the linear speedup in
Java and C, I would negate a hardware limitation.
_
Johann
### C ###
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#define NUM_THREADS 8
int inc(int);
double inc_d(int);
int inc(int x){
int y;
y = x + 1;
return y;
}
double inc_d(int x){
double y;
y = (double)x + 1.0;
return y;
}
void *BusyWork(void *t){
int i;
long tid;
int result=0;
tid = (long)t;
printf("Thread %ld starting...\n",tid);
for (i=0; i<1000000000; i++){
/* result = result + sin(i) * tan(i); */
result = inc(i);
}
printf("Thread %ld done. Result = %i\n",tid, result);
pthread_exit((void*) t);
}
void *BusyWork_d(void *t){
int i;
long tid;
double result=0.0;
tid = (long)t;
printf("Thread %ld starting...\n",tid);
for (i=0; i<1000000000; i++){
/* result = result + sin(i) * tan(i); */
result = inc_d(i);
}
printf("Thread %ld done. Result = %e\n",tid, result);
pthread_exit((void*) t);
}
void *BusyWork_single(){
int i;
double result=0.0;
for (i=0; i<1000000000; i++){
/* result = result + sin(i) * tan(i); */
result = inc_d(i);
}
}
int main (int argc, char *argv[]){
time_t start,end;
double dif;
pthread_t thread[NUM_THREADS];
pthread_attr_t attr;
int rc;
long t;
void *status;
start = time(NULL);
/* Running serial code */
for(t=0; t<NUM_THREADS; t++){
printf("Running serial code #: %ld\n", t);
BusyWork_single();
}
end = time(NULL);
dif = difftime(end,start);
printf("Runtime for serial code: %f\n", dif);
start = time(NULL);
/* Initialize and set thread detached attribute */
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
for(t=0; t<NUM_THREADS; t++){
printf("Main: creating thread %ld\n", t);
/* Let's rock */
/* rc = pthread_create(&thread[t], &attr, BusyWork, (void *)t);
*/
rc = pthread_create(&thread[t], &attr, BusyWork_d, (void *)t);
if (rc) {
printf("ERROR; return code from pthread_create() is
%d\n", rc);
exit(-1);
}
}
/* Free attribute and wait for the other threads */
pthread_attr_destroy(&attr);
for(t=0; t<NUM_THREADS; t++){
rc = pthread_join(thread[t], &status);
if (rc) {
printf("ERROR; return code from pthread_join() is
%d\n", rc);
exit(-1);
}
printf("Main: completed join with thread %ld having a status of
%ld
\n",t,(long)status);
}
end = time(NULL);
dif = difftime(end,start);
printf("Runtime for parallel code: %f\n", dif);
printf("Main: program completed. Exiting.\n");
pthread_exit(NULL);
}
### Java ###
import java.text.DecimalFormat;
public class MapTest{
public static class IntTest implements Runnable{
long loops;
long result;
public IntTest(long loops){
this.loops = loops;
}
// stupid work
public void run(){
result = 0;
for (long i = 0L; i < loops; i++){
result = result + 1;
}
System.out.println(result);
}
}
public static class DoubleTest implements Runnable{
long loops;
double result;
public DoubleTest(long loops){
this.loops = loops;
}
// stupid work
public void run(){
result = 0.0;
for (long i = 0L; i < loops; i++){
result = result + 1.0;
}
System.out.println(result);
}
}
public static void main(String[] args){
try{
long loops = 10000000000L;
// number of threads
int tcount = 8;
System.out.println("Number of Runs: "+tcount);
// IntTest sequential in one Block ;)
{
Thread[] tarray = new Thread[tcount];
for (int i = 0; i < tcount; i++)
tarray[i] = new Thread(new IntTest(loops));
long startTime = System.nanoTime();
long stopTime = 0;
long runTime = 0;
for (int i = 0; i < tcount; i++){
tarray[i].start();
tarray[i].join();
}
stopTime = System.nanoTime();
runTime = stopTime - startTime;
System.out.println();
System.out.println("Int ALL RUNS FINISHED for JOB.");
System.out.println("Int OVERALL Time: "+new DecimalFormat
("0.0000").format((double)runTime/1000000)+" ms");
System.out.println();
}
System.out.println("Number of Runs: "+tcount);
// DoubleTest sequential in one Block ;)
{
Thread[] tarray = new Thread[tcount];
for (int i = 0; i < tcount; i++)
tarray[i] = new Thread(new DoubleTest(loops));
long startTime = System.nanoTime();
long stopTime = 0;
long runTime = 0;
for (int i = 0; i < tcount; i++){
tarray[i].start();
tarray[i].join();
}
stopTime = System.nanoTime();
runTime = stopTime - startTime;
System.out.println();
System.out.println("Double ALL RUNS FINISHED for JOB.");
System.out.println("Double OVERALL Time: "+new DecimalFormat
("0.0000").format((double)runTime/1000000)+" ms");
System.out.println();
}
System.out.println("Number of Threads: "+tcount);
// IntTest parallel in one Block ;)
{
Thread[] tarray = new Thread[tcount];
for (int i = 0; i < tcount; i++)
tarray[i] = new Thread(new IntTest(loops));
long startTime = System.nanoTime();
long stopTime = 0;
long runTime = 0;
for (int i = 0; i < tcount; i++)
tarray[i].start();
for (int i = 0; i < tcount; i++)
tarray[i].join();
stopTime = System.nanoTime();
runTime = stopTime - startTime;
System.out.println();
System.out.println("Int ALL THREADS FINISHED for JOB.");
System.out.println("Int OVERALL Time: "+new DecimalFormat
("0.0000").format((double)runTime/1000000)+" ms");
System.out.println();
}
// DoubleTest parallel in one Block ;)
{
Thread[] tarray = new Thread[tcount];
for (int i = 0; i < tcount; i++)
tarray[i] = new Thread(new DoubleTest(loops));
long startTime = System.nanoTime();
long stopTime = 0;
long runTime = 0;
for (int i = 0; i < tcount; i++)
tarray[i].start();
for (int i = 0; i < tcount; i++)
tarray[i].join();
stopTime = System.nanoTime();
runTime = stopTime - startTime;
System.out.println();
System.out.println("Double ALL THREADS FINISHED for JOB. ");
System.out.println("Double OVERALL Time: "+new DecimalFormat
("0.0000").format((double)runTime/1000000)+" ms");
System.out.println();
}
}
catch(Exception e){}
}
}
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google
Groups "Clojure" group.
To post to this group, send email to [email protected]
Note that posts from new members are moderated - please be patient with your
first post.
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at
http://groups.google.com/group/clojure?hl=en
-~----------~----~----~----~------~----~------~--~---