Hey David, these two --param options made no difference to the test.
I've cut the suite down to a single test (attached), which yields the
following results:
./simple_types_constant_folding_os (gcc 41)
test description time operations/s
0 "int8_t constant add" 1.34 sec 1194.03 M
./simple_types_constant_folding_os (gcc 46)
test description time operations/s
0 "int8_t constant add" 2.84 sec 563.38 M
Both compilers fully inline the templated function and the emitted code
looks very similar. I am puzzled as to why one of these loops is
significantly slower than the other. I've attached disassembled listings
- perhaps someone could have a look please? (the body of the loop starts
at 0000000000400FD for gcc41 and at 0000000000400D90 for gcc46)
Thanks,
Oleg.
On 2011/8/1 22:48, Xinliang David Li wrote:
Try isolate the int8_t constant folding testing from the rest to see
if the slow down can be reproduced with the isolated case. If the
problem disappear, it is likely due to the following inline
parameters:
large-function-insns, large-function-growth, large-unit-insns,
inline-unit-growth. For instance set
--param large-function-insns=10000
--param large-unit-insns=20000
David
On Mon, Aug 1, 2011 at 11:43 AM, Oleg Smolsky<oleg.smol...@riverbed.com> wrote:
On 2011/7/29 14:07, Xinliang David Li wrote:
Profiling tools are your best friend here. If you don't have access to
any, the least you can do is to build the program with -pg option and
use gprof tool to find out differences.
The test suite has a bunch of very basic C++ tests that are executed an
enormous number of times. I've built one with the obvious performance
degradation and attached the source, output and reports.
Here are some highlights:
v4.1: Total absolute time for int8_t constant folding: 30.42 sec
v4.6: Total absolute time for int8_t constant folding: 43.32 sec
Every one of the tests in this section had degraded... the first half more
than the second. I am not sure how much further I can take this - the
benchmarked code is very short and plain. I can post disassembly for one
(some?) of them if anyone is willing to take a look...
Thanks,
Oleg.
/*
Copyright 2007-2008 Adobe Systems Incorporated
Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
or a copy at http://stlab.adobe.com/licenses.html )
Source file for tests shared among several benchmarks
*/
/******************************************************************************/
template<typename T>
inline bool tolerance_equal(T &a, T &b) {
T diff = a - b;
return (abs(diff) < 1.0e-6);
}
template<>
inline bool tolerance_equal(int32_t &a, int32_t &b) {
return (a == b);
}
template<>
inline bool tolerance_equal(uint32_t &a, uint32_t &b) {
return (a == b);
}
template<>
inline bool tolerance_equal(uint64_t &a, uint64_t &b) {
return (a == b);
}
template<>
inline bool tolerance_equal(int64_t &a, int64_t &b) {
return (a == b);
}
template<>
inline bool tolerance_equal(double &a, double &b) {
double diff = a - b;
double reldiff = diff;
if (fabs(a) > 1.0e-8)
reldiff = diff / a;
return (fabs(reldiff) < 1.0e-6);
}
template<>
inline bool tolerance_equal(float &a, float &b) {
float diff = a - b;
double reldiff = diff;
if (fabs(a) > 1.0e-4)
reldiff = diff / a;
return (fabs(reldiff) < 1.0e-3); // single precision
divide test is really imprecise
}
/******************************************************************************/
template <typename T, typename Shifter>
inline void check_shifted_sum(T result) {
T temp = (T)SIZE * Shifter::do_shift((T)init_value);
if (!tolerance_equal<T>(result,temp))
printf("test %i failed\n", current_test);
}
template <typename T, typename Shifter>
inline void check_shifted_sum_CSE(T result) {
T temp = (T)0.0;
if (!tolerance_equal<T>(result,temp))
printf("test %i failed\n", current_test);
}
template <typename T, typename Shifter>
inline void check_shifted_variable_sum(T result, T var) {
T temp = (T)SIZE * Shifter::do_shift((T)init_value, var);
if (!tolerance_equal<T>(result,temp))
printf("test %i failed\n", current_test);
}
template <typename T, typename Shifter>
inline void check_shifted_variable_sum(T result, T var1, T var2, T var3, T
var4) {
T temp = (T)SIZE * Shifter::do_shift((T)init_value, var1, var2, var3,
var4);
if (!tolerance_equal<T>(result,temp))
printf("test %i failed\n", current_test);
}
template <typename T, typename Shifter>
inline void check_shifted_variable_sum_CSE(T result, T var) {
T temp = (T)0.0;
if (!tolerance_equal<T>(result,temp))
printf("test %i failed\n", current_test);
}
template <typename T, typename Shifter>
inline void check_shifted_variable_sum_CSE(T result, T var1, T var2, T var3, T
var4) {
T temp = (T)0.0;
if (!tolerance_equal<T>(result,temp))
printf("test %i failed\n", current_test);
}
/******************************************************************************/
template <typename Iterator, typename T>
void fill(Iterator first, Iterator last, T value) {
while (first != last) *first++ = value;
}
/******************************************************************************/
template <typename T>
struct custom_constant_add {
static T do_shift(T input) { return (input + T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_multiple_constant_add {
static T do_shift(T input) { return (input + T(1) + T(2) + T(3) +
T(4)); }
};
/******************************************************************************/
template <typename T>
struct custom_constant_sub {
static T do_shift(T input) { return (input - T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_multiple_constant_sub {
static T do_shift(T input) { return (input - T(1) - T(2) - T(3) -
T(4)); }
};
/******************************************************************************/
template <typename T>
struct custom_constant_multiply {
static T do_shift(T input) { return (input * T(120)); }
};
/******************************************************************************/
// this should result in a single multiply
template <typename T>
struct custom_multiple_constant_multiply {
static T do_shift(T input) { return (input * T(2) * T(3) * T(4) *
T(5)); }
};
/******************************************************************************/
// this should result in a single add
template <typename T>
struct custom_multiple_constant_multiply2 {
static T do_shift(T input) { return (input + T(2) * T(3) * T(4) *
T(5)); }
};
/******************************************************************************/
template <typename T>
struct custom_constant_divide {
static T do_shift(T input) { return (input / T(5)); }
};
/******************************************************************************/
template <typename T>
struct custom_multiple_constant_divide {
static T do_shift(T input) { return ((((input / T(2) ) / T(3) ) /
T(4)) / T(5)); }
};
/******************************************************************************/
// this more likely to have constants fused than the version above
template <typename T>
struct custom_multiple_constant_divide2 {
static T do_shift(T input) { return (input + (((T(120) / T(3) ) /
T(4)) / T(5))); }
};
/******************************************************************************/
template <typename T>
struct custom_multiple_constant_mixed {
static T do_shift(T input) { return (input + T(2) - T(3) * T(4) /
T(5)); }
};
/******************************************************************************/
template <typename T>
struct custom_constant_and {
static T do_shift(T input) { return (input & T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_multiple_constant_and {
static T do_shift(T input) { return (input & T(15) & T(30) & T(31) &
T(63)); }
};
/******************************************************************************/
template <typename T>
struct custom_constant_or {
static T do_shift(T input) { return (input | T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_multiple_constant_or {
static T do_shift(T input) { return (input | T(15) | T(30) | T(31) |
T(63)); }
};
/******************************************************************************/
template <typename T>
struct custom_constant_xor {
static T do_shift(T input) { return (input ^ T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_multiple_constant_xor {
static T do_shift(T input) { return (input ^ T(15) ^ T(30) ^ T(31) ^
T(63)); }
};
/******************************************************************************/
template <typename T>
struct custom_two {
static T do_shift(T input) { return (T(2)); }
};
/******************************************************************************/
template <typename T>
struct custom_add_constants {
static T do_shift(T input) { return (T(1) + T(2)); }
};
/******************************************************************************/
template <typename T>
struct custom_sub_constants {
static T do_shift(T input) { return (T(2) - T(1)); }
};
/******************************************************************************/
template <typename T>
struct custom_multiply_constants {
static T do_shift(T input) { return (T(2) * T(3)); }
};
/******************************************************************************/
template <typename T>
struct custom_divide_constants {
static T do_shift(T input) { return (T(20) / T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_mod_constants {
static T do_shift(T input) { return (T(23) % T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_and_constants {
static T do_shift(T input) { return (T(23) & T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_or_constants {
static T do_shift(T input) { return (T(23) | T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_xor_constants {
static T do_shift(T input) { return (T(23) ^ T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_equal_constants {
static T do_shift(T input) { return (T(23) == T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_notequal_constants {
static T do_shift(T input) { return (T(23) != T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_greaterthan_constants {
static T do_shift(T input) { return (T(23) > T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_lessthan_constants {
static T do_shift(T input) { return (T(23) < T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_greaterthanequal_constants {
static T do_shift(T input) { return (T(23) >= T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_lessthanequal_constants {
static T do_shift(T input) { return (T(23) <= T(10)); }
};
/******************************************************************************/
template <typename T>
struct custom_add_variable {
static T do_shift(T input, T v1) { return (input + v1); }
};
/******************************************************************************/
template <typename T>
struct custom_sub_variable {
static T do_shift(T input, T v1) { return (input - v1); }
};
/******************************************************************************/
template <typename T>
struct custom_multiply_variable {
static T do_shift(T input, T v1) { return (input * v1); }
};
/******************************************************************************/
template <typename T>
struct custom_divide_variable {
static T do_shift(T input, T v1) { return (input / v1); }
};
/******************************************************************************/
template <typename T>
struct custom_add_multiple_variable {
static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input +
v1 + v2 + v3 + v4); }
};
/******************************************************************************/
template <typename T>
struct custom_sub_multiple_variable {
static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input -
v1 - v2 - v3 - v4); }
};
/******************************************************************************/
template <typename T>
struct custom_multiply_multiple_variable {
static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input *
v1 * v2 * v3 * v4); }
};
/******************************************************************************/
// something more likely to be moved out of loops, and a sanity check
template <typename T>
struct custom_multiply_multiple_variable2 {
static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input +
v1 * v2 * v3 * v4); }
};
/******************************************************************************/
// this can NOT have CSE and loop invariant motion applied in integer math
// and can only be optimized in float if inexact math is allowed
template <typename T>
struct custom_divide_multiple_variable {
static T do_shift(T input, T v1, T v2, T v3, T v4) { return ((((input
/ v1 ) / v2 ) / v3) / v4); }
};
/******************************************************************************/
// this can have CSE and loop invariant motion applied in integer math
// this should be optimizeable without inexact math
template <typename T>
struct custom_divide_multiple_variable2 {
static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input +
(((v1 / v2 ) / v3) / v4)); }
};
/******************************************************************************/
template <typename T>
struct custom_mixed_multiple_variable {
static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input +
v1 - v2 * v3 / v4); }
};
/******************************************************************************/
template <typename T>
struct custom_variable_and {
static T do_shift(T input, T v1) { return (input & v1); }
};
/******************************************************************************/
template <typename T>
struct custom_multiple_variable_and {
static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input &
v1 & v2 & v3 & v4); }
};
/******************************************************************************/
template <typename T>
struct custom_variable_or {
static T do_shift(T input, T v1) { return (input | v1); }
};
/******************************************************************************/
template <typename T>
struct custom_multiple_variable_or {
static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input |
v1 | v2 | v3 | v4); }
};
/******************************************************************************/
template <typename T>
struct custom_variable_xor {
static T do_shift(T input, T v1) { return (input ^ v1); }
};
/******************************************************************************/
template <typename T>
struct custom_multiple_variable_xor {
static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input ^
v1 ^ v2 ^ v3 ^ v4); }
};
/******************************************************************************/
template <typename T>
struct custom_identity {
static T do_shift(T input) { return (input); }
};
/******************************************************************************/
template <typename T>
struct custom_add_zero {
static T do_shift(T input) { return (input + T(0)); }
};
/******************************************************************************/
template <typename T>
struct custom_sub_zero {
static T do_shift(T input) { return (input - T(0)); }
};
/******************************************************************************/
template <typename T>
struct custom_negate {
static T do_shift(T input) { return (-input); }
};
/******************************************************************************/
template <typename T>
struct custom_negate_twice {
static T do_shift(T input) { return (-(-input)); }
};
/******************************************************************************/
template <typename T>
struct custom_zero_minus {
static T do_shift(T input) { return (T(0) - input); }
};
/******************************************************************************/
template <typename T>
struct custom_times_one {
static T do_shift(T input) { return (input * T(1)); }
};
/******************************************************************************/
template <typename T>
struct custom_divideby_one {
static T do_shift(T input) { return (input / T(1)); }
};
/******************************************************************************/
template <typename T>
struct custom_algebra_mixed {
static T do_shift(T input) { return (-(T(0) - (((input + T(0)) -
T(0)) / T(1)))) * T(1); }
};
/******************************************************************************/
template <typename T>
struct custom_zero {
static T do_shift(T input) { return T(0); }
};
/******************************************************************************/
template <typename T>
struct custom_times_zero {
static T do_shift(T input) { return (input * T(0)); }
};
/******************************************************************************/
template <typename T>
struct custom_subtract_self {
static T do_shift(T input) { return (input - input); }
};
/******************************************************************************/
template <typename T>
struct custom_algebra_mixed_constant {
static T do_shift(T input) { return (input - (-(T(0) - (((input +
T(0)) / T(1)) - T(0)))) * T(1)); }
};
/******************************************************************************/
template <typename T>
struct custom_cse1 {
static T do_shift(T v1, T v2, T v3) { return (v1 * (v2 - v3) ); }
};
/******************************************************************************/
template <typename T>
struct custom_and_self {
static T do_shift(T input) { return (input & input); }
};
/******************************************************************************/
template <typename T>
struct custom_or_self {
static T do_shift(T input) { return (input | input); }
};
/******************************************************************************/
template <typename T>
struct custom_xor_self {
static T do_shift(T input) { return (input ^ input); }
};
/******************************************************************************/
template <typename T>
struct custom_or_zero {
static T do_shift(T input) { return (input | T(0)); }
};
/******************************************************************************/
template <typename T>
struct custom_xor_zero {
static T do_shift(T input) { return (input ^ T(0)); }
};
/******************************************************************************/
template <typename T>
struct custom_andnot_zero {
static T do_shift(T input) { return (input & ~ T(0)); }
};
/******************************************************************************/
template <typename T>
struct custom_and_zero {
static T do_shift(T input) { return (input & T(0)); }
};
/******************************************************************************/
template <typename T>
struct custom_mod_one {
static T do_shift(T input) { return (input % T(1)); }
};
/******************************************************************************/
template <typename T>
struct custom_equal_self {
static T do_shift(T input) { return (input == input); }
};
/******************************************************************************/
template <typename T>
struct custom_notequal_self {
static T do_shift(T input) { return (input != input); }
};
/******************************************************************************/
template <typename T>
struct custom_greaterthan_self {
static T do_shift(T input) { return (input > input); }
};
/******************************************************************************/
template <typename T>
struct custom_lessthan_self {
static T do_shift(T input) { return (input < input); }
};
/******************************************************************************/
template <typename T>
struct custom_greaterthanequal_self {
static T do_shift(T input) { return (input >= input); }
};
/******************************************************************************/
template <typename T>
struct custom_lessthanequal_self {
static T do_shift(T input) { return (input <= input); }
};
/******************************************************************************/
template <typename T, typename Shifter>
void test_constant(T* first, int count, const char *label) {
int i;
start_timer();
for(i = 0; i < iterations; ++i) {
T result = 0;
for (int n = 0; n < count; ++n) {
result += Shifter::do_shift( first[n] );
}
check_shifted_sum<T, Shifter>(result);
}
record_result( timer(), label );
}
/******************************************************************************/
template <typename T, typename Shifter>
void test_variable1(T* first, int count, T v1, const char *label) {
int i;
start_timer();
for(i = 0; i < iterations; ++i) {
T result = 0;
for (int n = 0; n < count; ++n) {
result += Shifter::do_shift( first[n], v1 );
}
check_shifted_variable_sum<T, Shifter>(result, v1);
}
record_result( timer(), label );
}
/******************************************************************************/
template <typename T, typename Shifter>
void test_variable4(T* first, int count, T v1, T v2, T v3, T v4, const char
*label) {
int i;
start_timer();
for(i = 0; i < iterations; ++i) {
T result = 0;
for (int n = 0; n < count; ++n) {
result += Shifter::do_shift( first[n], v1, v2, v3, v4 );
}
check_shifted_variable_sum<T, Shifter>(result, v1, v2, v3, v4);
}
record_result( timer(), label );
}
/******************************************************************************/
template <typename T, typename Shifter>
void test_CSE_opt(T* first, int count, T v1, const char *label) {
int i;
start_timer();
for(i = 0; i < iterations; ++i) {
T result = 0;
T temp = Shifter::do_shift( v1, first[0], first[1] );
temp += temp;
result += first[0] + temp;
result -= first[1] + temp;
for (int n = 1; n < count; ++n) {
temp = Shifter::do_shift( v1, first[n-1], first[n] );
temp += temp;
result += first[n-1] + temp;
result -= first[n] + temp;
}
check_shifted_variable_sum_CSE<T, Shifter>(result, v1);
}
record_result( timer(), label );
}
/******************************************************************************/
template <typename T, typename Shifter>
void test_CSE(T* first, int count, T v1, const char *label) {
int i;
start_timer();
for(i = 0; i < iterations; ++i) {
T result = 0;
result += first[0] + Shifter::do_shift( v1, first[0], first[1] ) +
Shifter::do_shift( v1, first[0], first[1] );
result -= first[1] + Shifter::do_shift( v1, first[0], first[1] ) +
Shifter::do_shift( v1, first[0], first[1] );
for (int n = 1; n < count; ++n) {
result += first[n-1] + Shifter::do_shift( v1, first[n-1],
first[n] ) + Shifter::do_shift( v1, first[n-1], first[n] );
result -= first[n] + Shifter::do_shift( v1, first[n-1],
first[n] ) + Shifter::do_shift( v1, first[n-1], first[n] );
}
check_shifted_variable_sum_CSE<T, Shifter>(result, v1);
}
record_result( timer(), label );
}
/******************************************************************************/
/*
Copyright 2007-2008 Adobe Systems Incorporated
Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
or a copy at http://stlab.adobe.com/licenses.html )
Goal: Test compiler optimizations related to constant folding of simple
language defined types
Assumptions:
1) the compiler will combine constant calculations into a single
constant for simple types
aka constant folding
result = A + B ==> result =
constant
result = A - B ==> result =
constant
result = A * B ==> result =
constant
result = A / B ==> result =
constant
result = A % B ==> result =
constant for integer types
result = (A == B) ==> result =
constant for integer types
result = (A != B) ==> result =
constant for integer types
result = (A > B) ==> result =
constant for integer types
result = (A < B) ==> result =
constant for integer types
result = (A >= B) ==> result =
constant for integer types
result = (A <= B) ==> result =
constant for integer types
result = (A & B) ==> result =
constant for integer types
result = (A | B) ==> result =
constant for integer types
result = (A ^ B) ==> result =
constant for integer types
result = input + A + B + C + D ==> result = input
+ (A+B+C+D)
result = input - A - B - C - D ==> result = input
- (A+B+C+D)
result = input * A * B * C * D ==> result = input
* (A*B*C*D)
result = input + A * B * C * D ==> result = input
+ (A*B*C*D)
result = ((((input/A) /B) /C) /D) ==> result = input
/ (A*B*C*D)
result = input + (((A /B) /C) /D) ==> result = input
+ (A/B/C/D)
result = input & A & B & C & D ==> result = input
& (A&B&C&D) for integer types
result = input | A | B | C | D ==> result = input
| (A|B|C|D) for integer types
result = input ^ A ^ B ^ C ^ D ==> result = input
^ (A^B^C^D) for integer types
NOTE - in some cases, loop invariant code motion might move the constant
calculation out of the inner loop
making it appear that the constants were folded
But in the constant result cases, we want the compiler to
recognize the constant and move it out of the loop
*/
/******************************************************************************/
#include "benchmark_stdint.hpp"
#include <cstddef>
#include <cstdio>
#include <ctime>
#include <cstdlib>
#include <cmath>
#include "benchmark_results.h"
#include "benchmark_timer.h"
/******************************************************************************/
// this constant may need to be adjusted to give reasonable minimum times
// For best results, times should be about 1.0 seconds for the minimum test run
int base_iterations = 2000000;
int iterations = base_iterations;
// 8000 items, or between 8k and 64k of data
// this is intended to remain within the L2 cache of most common CPUs
const int SIZE = 8000;
// initial value for filling our arrays, may be changed from the command line
double init_value = 1.0;
/******************************************************************************/
// our global arrays of numbers to be operated upon
double dataDouble[SIZE];
float dataFloat[SIZE];
uint64_t data64unsigned[SIZE];
int64_t data64[SIZE];
uint32_t data32unsigned[SIZE];
int32_t data32[SIZE];
uint16_t data16unsigned[SIZE];
int16_t data16[SIZE];
uint8_t data8unsigned[SIZE];
int8_t data8[SIZE];
/******************************************************************************/
#include "benchmark_shared_tests.h"
/******************************************************************************/
int main(int argc, char** argv) {
// output command for documentation:
int i;
for (i = 0; i < argc; ++i)
printf("%s ", argv[i] );
printf("\n");
if (argc > 1) base_iterations = atoi(argv[1]);
if (argc > 2) init_value = (double) atof(argv[2]);
// int8_t
::fill(data8, data8+SIZE, int8_t(init_value));
iterations = base_iterations / 10;
test_constant<int8_t, custom_constant_add<int8_t> >(data8,SIZE,"int8_t
constant add");
summarize("int8_t constant folding", SIZE, iterations, kDontShowGMeans,
kDontShowPenalty );
return 0;
}
// the end
/******************************************************************************/
/******************************************************************************/
.text:0000000000400EFF ;
---------------------------------------------------------------------------
.text:0000000000400F04 align 10h
.text:0000000000400F10
.text:0000000000400F10 ; =============== S U B R O U T I N E
=======================================
.text:0000000000400F10
.text:0000000000400F10 ; Attributes: bp-based frame
.text:0000000000400F10
.text:0000000000400F10 public main
.text:0000000000400F10 main proc near
.text:0000000000400F10 push rbp
.text:0000000000400F11 mov rbp, rsp
.text:0000000000400F14 push r14
.text:0000000000400F16 push r13
.text:0000000000400F18 push r12
.text:0000000000400F1A push rbx
.text:0000000000400F1B call _mcount
.text:0000000000400F20 test edi, edi
.text:0000000000400F22 mov r13d, edi
.text:0000000000400F25 mov r14, rsi
.text:0000000000400F28 jle short loc_400F4C
.text:0000000000400F2A mov rbx, rsi
.text:0000000000400F2D xor r12d, r12d
.text:0000000000400F30
.text:0000000000400F30 loc_400F30: ; CODE XREF:
main+3Aj
.text:0000000000400F30 mov rsi, [rbx]
.text:0000000000400F33 xor eax, eax
.text:0000000000400F35 mov edi, offset aS ; "%s "
.text:0000000000400F3A add r12d, 1
.text:0000000000400F3E add rbx, 8
.text:0000000000400F42 call _printf
.text:0000000000400F47 cmp r12d, r13d
.text:0000000000400F4A jnz short loc_400F30
.text:0000000000400F4C
.text:0000000000400F4C loc_400F4C: ; CODE XREF:
main+18j
.text:0000000000400F4C mov edi, 0Ah ; c
.text:0000000000400F51 call _putchar
.text:0000000000400F56 cmp r13d, 1
.text:0000000000400F5A jle short loc_400F74
.text:0000000000400F5C mov rdi, [r14+8] ; nptr
.text:0000000000400F60 xor ecx, ecx ; group
.text:0000000000400F62 mov edx, 0Ah ; base
.text:0000000000400F67 xor esi, esi ; endptr
.text:0000000000400F69 call ___strtol_internal
.text:0000000000400F6E mov cs:base_iterations, eax
.text:0000000000400F74
.text:0000000000400F74 loc_400F74: ; CODE XREF:
main+4Aj
.text:0000000000400F74 cmp r13d, 2
.text:0000000000400F78 jg loc_40110B
.text:0000000000400F7E
.text:0000000000400F7E loc_400F7E: ; CODE XREF:
main+210j
.text:0000000000400F7E movsd xmm0, cs:init_value
.text:0000000000400F86 xor eax, eax
.text:0000000000400F88 cvttsd2si edx, xmm0
.text:0000000000400F8C db 66h, 66h, 66h
.text:0000000000400F8C nop
.text:0000000000400F90
.text:0000000000400F90 loc_400F90: ; CODE XREF:
main+90j
.text:0000000000400F90 mov ds:data8[rax], dl
.text:0000000000400F96 add rax, 1
.text:0000000000400F9A cmp rax, 1F40h
.text:0000000000400FA0 jnz short loc_400F90
.text:0000000000400FA2 mov ecx, cs:base_iterations
.text:0000000000400FA8 mov edx, 66666667h
.text:0000000000400FAD mov eax, ecx
.text:0000000000400FAF sar ecx, 1Fh
.text:0000000000400FB2 imul edx
.text:0000000000400FB4 sar edx, 2
.text:0000000000400FB7 sub edx, ecx
.text:0000000000400FB9 mov cs:iterations, edx
.text:0000000000400FBF call _clock
.text:0000000000400FC4 mov cs:start_time, rax
.text:0000000000400FCB mov eax, cs:iterations
.text:0000000000400FD1 test eax, eax
.text:0000000000400FD3 jle short loc_40103B
.text:0000000000400FD5 xor ebx, ebx
.text:0000000000400FD7
.text:0000000000400FD7 loc_400FD7: ; CODE XREF:
main+129j
.text:0000000000400FD7 xor ecx, ecx
.text:0000000000400FD9 xor edx, edx
.text:0000000000400FDB db 66h, 66h
.text:0000000000400FDB nop
.text:0000000000400FDE db 66h
.text:0000000000400FDE nop
.text:0000000000400FE0
.text:0000000000400FE0 loc_400FE0: ; CODE XREF:
main+E8j
.text:0000000000400FE0 movzx eax, ds:data8[rdx]
.text:0000000000400FE7 add rdx, 1
.text:0000000000400FEB add eax, 0Ah
.text:0000000000400FEE cmp rdx, 1F40h
.text:0000000000400FF5 lea ecx, [rax+rcx]
.text:0000000000400FF8 jnz short loc_400FE0
.text:0000000000400FFA movsd xmm0, cs:init_value
.text:0000000000401002 movsd xmm1, cs:qword_401260
.text:000000000040100A cvttsd2si eax, xmm0
.text:000000000040100E add eax, 0Ah
.text:0000000000401011 shl eax, 6
.text:0000000000401014 sub cl, al
.text:0000000000401016 movsx eax, cl
.text:0000000000401019 mov edx, eax
.text:000000000040101B sar edx, 1Fh
.text:000000000040101E xor eax, edx
.text:0000000000401020 sub eax, edx
.text:0000000000401022 cvtsi2sd xmm0, eax
.text:0000000000401026 ucomisd xmm1, xmm0
.text:000000000040102A jbe loc_4010F4
.text:0000000000401030
.text:0000000000401030 loc_401030: ; CODE XREF:
main+1F6j
.text:0000000000401030 add ebx, 1
.text:0000000000401033 cmp cs:iterations, ebx
.text:0000000000401039 jg short loc_400FD7
.text:000000000040103B
.text:000000000040103B loc_40103B: ; CODE XREF:
main+C3j
.text:000000000040103B call _clock
.text:0000000000401040 mov rdi, cs:results ; ptr
.text:0000000000401047 mov rbx, rax
.text:000000000040104A mov cs:end_time, rax
.text:0000000000401051 mov r12, cs:start_time
.text:0000000000401058 test rdi, rdi
.text:000000000040105B jz short loc_40106B
.text:000000000040105D mov edx, cs:current_test
.text:0000000000401063 cmp edx, cs:allocated_results
.text:0000000000401069 jl short loc_40109C
.text:000000000040106B
.text:000000000040106B loc_40106B: ; CODE XREF:
main+14Bj
.text:000000000040106B mov esi, cs:allocated_results
.text:0000000000401071 add esi, 0Ah
.text:0000000000401074 mov cs:allocated_results, esi
.text:000000000040107A movsxd rsi, esi
.text:000000000040107D shl rsi, 4 ; size
.text:0000000000401081 call _realloc
.text:0000000000401086 test rax, rax
.text:0000000000401089 mov cs:results, rax
.text:0000000000401090 jz loc_401125
.text:0000000000401096 mov edx, cs:current_test
.text:000000000040109C
.text:000000000040109C loc_40109C: ; CODE XREF:
main+159j
.text:000000000040109C sub rbx, r12
.text:000000000040109F movsxd rax, edx
.text:00000000004010A2 xor r8d, r8d
.text:00000000004010A5 cvtsi2sd xmm0, rbx
.text:00000000004010AA shl rax, 4
.text:00000000004010AE add rax, cs:results
.text:00000000004010B5 xor ecx, ecx
.text:00000000004010B7 mov esi, 1F40h
.text:00000000004010BC mov edi, offset aInt8_tConstant ;
"int8_t constant folding"
.text:00000000004010C1 mov qword ptr [rax+8], 4012BAh
.text:00000000004010C9 divsd xmm0, cs:qword_401258
.text:00000000004010D1 movsd qword ptr [rax], xmm0
.text:00000000004010D5 lea eax, [rdx+1]
.text:00000000004010D8 mov edx, cs:iterations
.text:00000000004010DE mov cs:current_test, eax
.text:00000000004010E4 call _Z9summarizePKciiii ;
summarize(char const*,int,int,int,int)
.text:00000000004010E9 pop rbx
.text:00000000004010EA pop r12
.text:00000000004010EC pop r13
.text:00000000004010EE pop r14
.text:00000000004010F0 leave
.text:00000000004010F1 xor eax, eax
.text:00000000004010F3 retn
.text:00000000004010F4 ;
---------------------------------------------------------------------------
.text:00000000004010F4
.text:00000000004010F4 loc_4010F4: ; CODE XREF:
main+11Aj
.text:00000000004010F4 mov esi, cs:current_test
.text:00000000004010FA mov edi, offset aTestIFailed ; "test
%i failed\n"
.text:00000000004010FF xor eax, eax
.text:0000000000401101 call _printf
.text:0000000000401106 jmp loc_401030
.text:000000000040110B ;
---------------------------------------------------------------------------
.text:000000000040110B
.text:000000000040110B loc_40110B: ; CODE XREF:
main+68j
.text:000000000040110B mov rdi, [r14+10h] ; nptr
.text:000000000040110F xor edx, edx ; group
.text:0000000000401111 xor esi, esi ; endptr
.text:0000000000401113 call ___strtod_internal
.text:0000000000401118 movsd cs:init_value, xmm0
.text:0000000000401120 jmp loc_400F7E
.text:0000000000401125 ;
---------------------------------------------------------------------------
.text:0000000000401125
.text:0000000000401125 loc_401125: ; CODE XREF:
main+180j
.text:0000000000401125 mov esi, cs:allocated_results
.text:000000000040112B mov edi, offset aCouldNotAlloca ;
"Could not allocate %d results\n"
.text:0000000000401130 call _printf
.text:0000000000401135 mov edi, 0FFFFFFFFh ; status
.text:000000000040113A call _exit
.text:000000000040113A main endp
.text:0000000000400C98 ;
---------------------------------------------------------------------------
.text:0000000000400C99 align 20h
.text:0000000000400CA0
.text:0000000000400CA0 ; =============== S U B R O U T I N E
=======================================
.text:0000000000400CA0
.text:0000000000400CA0
.text:0000000000400CA0 public main
.text:0000000000400CA0 main proc near
.text:0000000000400CA0
.text:0000000000400CA0 var_28 = qword ptr -28h
.text:0000000000400CA0
.text:0000000000400CA0 push r12
.text:0000000000400CA2 push rbp
.text:0000000000400CA3 mov rbp, rsi
.text:0000000000400CA6 push rbx
.text:0000000000400CA7 mov ebx, edi
.text:0000000000400CA9 sub rsp, 10h
.text:0000000000400CAD test edi, edi
.text:0000000000400CAF jle loc_400E3B
.text:0000000000400CB5 xor r12d, r12d
.text:0000000000400CB8 db 66h, 66h, 66h
.text:0000000000400CB8 nop
.text:0000000000400CBC db 66h, 66h, 66h
.text:0000000000400CBC nop
.text:0000000000400CC0
.text:0000000000400CC0 loc_400CC0: ; CODE XREF:
main+38j
.text:0000000000400CC0 mov rsi, [rbp+r12*8+0]
.text:0000000000400CC5 xor eax, eax
.text:0000000000400CC7 mov edi, offset aS ; "%s "
.text:0000000000400CCC add r12, 1
.text:0000000000400CD0 call _printf
.text:0000000000400CD5 cmp ebx, r12d
.text:0000000000400CD8 jg short loc_400CC0
.text:0000000000400CDA mov edi, 0Ah ; c
.text:0000000000400CDF call _putchar
.text:0000000000400CE4 cmp ebx, 1
.text:0000000000400CE7 jle short loc_400D1B
.text:0000000000400CE9 mov rdi, [rbp+8] ; nptr
.text:0000000000400CED xor ecx, ecx ; group
.text:0000000000400CEF xor esi, esi ; endptr
.text:0000000000400CF1 mov edx, 0Ah ; base
.text:0000000000400CF6 call ___strtol_internal
.text:0000000000400CFB cmp ebx, 2
.text:0000000000400CFE mov cs:base_iterations, eax
.text:0000000000400D04 jz short loc_400D1B
.text:0000000000400D06 mov rdi, [rbp+10h] ; nptr
.text:0000000000400D0A xor edx, edx ; group
.text:0000000000400D0C xor esi, esi ; endptr
.text:0000000000400D0E call ___strtod_internal
.text:0000000000400D13 movsd cs:init_value, xmm0
.text:0000000000400D1B
.text:0000000000400D1B loc_400D1B: ; CODE XREF:
main+47j
.text:0000000000400D1B ; main+64j ...
.text:0000000000400D1B movsd xmm1, cs:init_value
.text:0000000000400D23 mov ecx, 1F4h
.text:0000000000400D28 cvttsd2si eax, xmm1
.text:0000000000400D2C pxor xmm1, xmm1
.text:0000000000400D30 movd xmm0, eax
.text:0000000000400D34 xor eax, eax
.text:0000000000400D36 pshufb xmm0, xmm1
.text:0000000000400D3B db 66h, 66h
.text:0000000000400D3B nop
.text:0000000000400D3E db 66h
.text:0000000000400D3E nop
.text:0000000000400D40
.text:0000000000400D40 loc_400D40: ; CODE XREF:
main+B6j
.text:0000000000400D40 mov rdx, rax
.text:0000000000400D43 add rax, 1
.text:0000000000400D47 shl rdx, 4
.text:0000000000400D4B cmp rcx, rax
.text:0000000000400D4E movdqa xmmword ptr [rdx+5015A0h], xmm0
.text:0000000000400D56 ja short loc_400D40
.text:0000000000400D58 mov ecx, cs:base_iterations
.text:0000000000400D5E mov edx, 66666667h
.text:0000000000400D63 xor ebx, ebx
.text:0000000000400D65 mov eax, ecx
.text:0000000000400D67 sar ecx, 1Fh
.text:0000000000400D6A imul edx
.text:0000000000400D6C sar edx, 2
.text:0000000000400D6F sub edx, ecx
.text:0000000000400D71 mov cs:iterations, edx
.text:0000000000400D77 call _Z11start_timerv ;
start_timer(void)
.text:0000000000400D7C mov ecx, cs:iterations
.text:0000000000400D82 movsd xmm1, cs:qword_4010E0
.text:0000000000400D8A test ecx, ecx
.text:0000000000400D8C jle short loc_400DE3
.text:0000000000400D8E db 66h
.text:0000000000400D8E nop
.text:0000000000400D90
.text:0000000000400D90 loc_400D90: ; CODE XREF:
main+141j
.text:0000000000400D90 mov edx, offset data8
.text:0000000000400D95 xor eax, eax
.text:0000000000400D97 db 66h, 66h
.text:0000000000400D97 nop
.text:0000000000400D9A db 66h, 66h
.text:0000000000400D9A nop
.text:0000000000400D9D db 66h, 66h
.text:0000000000400D9D nop
.text:0000000000400DA0
.text:0000000000400DA0 loc_400DA0: ; CODE XREF:
main+110j
.text:0000000000400DA0 add eax, 0Ah
.text:0000000000400DA3 add al, [rdx]
.text:0000000000400DA5 add rdx, 1
.text:0000000000400DA9 cmp rdx, 5034E0h
.text:0000000000400DB0 jnz short loc_400DA0
.text:0000000000400DB2 movsd xmm0, cs:init_value
.text:0000000000400DBA cvttsd2si edx, xmm0
.text:0000000000400DBE add edx, 0Ah
.text:0000000000400DC1 shl edx, 6
.text:0000000000400DC4 sub al, dl
.text:0000000000400DC6 movsx eax, al
.text:0000000000400DC9 mov edx, eax
.text:0000000000400DCB sar edx, 1Fh
.text:0000000000400DCE xor eax, edx
.text:0000000000400DD0 sub eax, edx
.text:0000000000400DD2 cvtsi2sd xmm0, eax
.text:0000000000400DD6 ucomisd xmm1, xmm0
.text:0000000000400DDA jbe short loc_400E17
.text:0000000000400DDC
.text:0000000000400DDC loc_400DDC: ; CODE XREF:
main+199j
.text:0000000000400DDC add ebx, 1
.text:0000000000400DDF cmp ebx, ecx
.text:0000000000400DE1 jl short loc_400D90
.text:0000000000400DE3
.text:0000000000400DE3 loc_400DE3: ; CODE XREF:
main+ECj
.text:0000000000400DE3 call _Z5timerv ; timer(void)
.text:0000000000400DE8 mov edi, offset aInt8_tConstant ;
"int8_t constant add"
.text:0000000000400DED call _Z13record_resultdPKc ;
record_result(double,char const*)
.text:0000000000400DF2 mov edx, cs:iterations
.text:0000000000400DF8 xor r8d, r8d
.text:0000000000400DFB xor ecx, ecx
.text:0000000000400DFD mov esi, 1F40h
.text:0000000000400E02 mov edi, offset aInt8_tConsta_0 ;
"int8_t constant folding"
.text:0000000000400E07 call _Z9summarizePKciiii ;
summarize(char const*,int,int,int,int)
.text:0000000000400E0C add rsp, 10h
.text:0000000000400E10 xor eax, eax
.text:0000000000400E12 pop rbx
.text:0000000000400E13 pop rbp
.text:0000000000400E14 pop r12
.text:0000000000400E16 retn
.text:0000000000400E17 ;
---------------------------------------------------------------------------
.text:0000000000400E17
.text:0000000000400E17 loc_400E17: ; CODE XREF:
main+13Aj
.text:0000000000400E17 mov esi, cs:current_test
.text:0000000000400E1D mov edi, offset aTestIFailed ; "test
%i failed\n"
.text:0000000000400E22 xor eax, eax
.text:0000000000400E24 movsd [rsp+28h+var_28], xmm1
.text:0000000000400E29 call _printf
.text:0000000000400E2E mov ecx, cs:iterations
.text:0000000000400E34 movsd xmm1, [rsp+28h+var_28]
.text:0000000000400E39 jmp short loc_400DDC
.text:0000000000400E3B ;
---------------------------------------------------------------------------
.text:0000000000400E3B
.text:0000000000400E3B loc_400E3B: ; CODE XREF:
main+Fj
.text:0000000000400E3B mov edi, 0Ah ; c
.text:0000000000400E40 call _putchar
.text:0000000000400E45 jmp loc_400D1B
.text:0000000000400E45 main endp