#include <stdio.h>
#include <cpuid.h>
#include <string.h>
#include <x86intrin.h>
#include <cpu-features.h>

extern void xsave (void);
extern void xsavec (void);
extern void fxsave (void);
extern void move (void);

#define LOOP 3000

char xsave_area[4096] __attribute__ ((aligned (64)));

static struct cpu_features cpu_features;
static uintptr_t xsave_state_size;
static uintptr_t xsave_state_comp_size;

#define XSAVE_AREA_MASK \
  ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))

int
main ()
{
  int i;
  unsigned long long start, end;
  unsigned long long diff;

  init_cpu_features (&cpu_features);

  start = __rdtsc ();
  for (i = 0; i < LOOP; i++)
    move ();
  end = __rdtsc ();
  diff = end - start;

  printf ("move    : %lld\n", diff);

  memset (xsave_area, -1, sizeof xsave_area);

  start = __rdtsc ();
  for (i = 0; i < LOOP; i++)
    fxsave ();
  end = __rdtsc ();
  diff = end - start;

  printf ("fxsave  : %lld\n", diff);

  if (CPU_FEATURES_CPU_P ((&cpu_features), OSXSAVE))
    {
      memset (xsave_area, -1, sizeof xsave_area);

      start = __rdtsc ();
      for (i = 0; i < LOOP; i++)
	xsave ();
      end = __rdtsc ();
      diff = end - start;

      printf ("xsave   : %lld\n", diff);

      if (cpu_features.max_cpuid >= 0xd)
	{
	  unsigned int eax, ebx, ecx, edx;

	  __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
	  xsave_state_size = ebx;

	  __cpuid_count (0xd, 1, eax, ebx, ecx, edx);

	  if ((eax & (1 << 1)) != 0)
	    {
	      unsigned int xstate_comp_offsets[32];
	      unsigned int xstate_comp_sizes[32];
	      unsigned int i;

	      xstate_comp_offsets[0] = 0;
	      xstate_comp_offsets[1] = 160;
	      xstate_comp_offsets[2] = 576;
	      xstate_comp_sizes[0] = 160;
	      xstate_comp_sizes[1] = 256;
	      for (i = 2; i < 32; i++)
		{
		  if ((XSAVE_AREA_MASK & (1 << i)) != 0)
		    {
		      __cpuid_count (0xd, i, eax, ebx, ecx, edx);
		      xstate_comp_sizes[i] = eax;
		    }
		  else
		    {
		      ecx = 0;
		      xstate_comp_sizes[i] = 0;
		    }

		  if (i > 2)
		    {
		      xstate_comp_offsets[i]
			= xstate_comp_offsets[i - 1] + xstate_comp_sizes[i -1];
		      if ((ecx & (1 << 1)) != 0)
			xstate_comp_offsets[i]
			  = (xstate_comp_offsets[i] + 63) & -64;
		    }
		}

	      xsave_state_comp_size
		= xstate_comp_offsets[31] + xstate_comp_sizes[31];

	      memset (xsave_area, -1, sizeof xsave_area);

	      start = __rdtsc ();
	      for (i = 0; i < LOOP; i++)
		xsavec ();
	      end = __rdtsc ();
	      diff = end - start;

	      for (i = xsave_state_comp_size; i < sizeof xsave_area; i++)
		if (xsave_area[i] != -1)
		  abort ();

	      printf ("xsavec  : %lld\n", diff);

	      printf ("xsave_state_size: %d\n", xsave_state_size);
	      printf ("xsave_state_comp_size: %d\n", xsave_state_comp_size);
	    }
	}
    }

  return 0;
}
