#include "MemTest.h"
#include "BlackJackTimer.h"

/*  A simple set of code to find cache line sizes.
 *  While this will produce data for a TLB that can be analyzed by
 *  a human, there are easier ways to find the TLB PageSize (see, for
 *  example, the code in MemAnalysis.c where CorDTest() is used to 
 *  find the page size.  (Watch the variable COrDFlattens.)
 *
 *
 *
 */

#define MIN_STRIDE 1
#define MIN_PAGE_SIZE 256
#define MAX_STRIDE 4096

int  Strides[MAX_TESTS_PER_RUN];
double Times[MAX_TESTS_PER_RUN];

static int *Array = (int *) NULL;

static int AnalyzeGran ( int s[], double t[], int c );

static double TimeStride( int TestSize, int Stride, struct AccessCount NA )
{
  int p, iterations, loads, LowOrder, Mask;
  ticks start, finish;

  if (HeartBeat > 1)
    fprintf(stderr,"-> StrideTest (%s, %s)\n",
	    PrintNum(TestSize),PrintNum(Stride));

  iterations = NA.outer; 
  LowOrder   = NA.inner;

  Flush( TestSize );

  start = getticks();
  while( iterations > 0)
  {
    loads = LowOrder;
    p = 0;
    while(loads > 0)
    {
      p = Array[p];

      loads--;
    }
    iterations--;
  }
  finish = getticks();

  GlobalTrash = p;

  return elapsed(finish,start);
}

static SetUpTrial( int Size, int Stride )
{
  int i, j, count, this, last, variation;
  int *Index, *Old;

  Old = Array;
  Array = (int *) PACE_AllocMem(Size * sizeof(int));
  if (Old != (int *) NULL)
  {
    PACE_FreeMem(Old);
    Old = (int *) NULL;
  }

  count = Size / Stride;
  Index = PACE_AllocMem(count * sizeof(int) );
  (void) GenerateLinearSet( Index, count,  Stride);
  (void) Shuffle(Index, count);

  last = Index[count-1];
  for (i=0;i<count;i++)
  { 
    this = Index[i];
    Array[this] = last;
    last = this;
  }
  PACE_FreeMem((void*) Index);
}

int FindGranularity( int CacheSize )
{
  int i, j, k, l, TestSize, count, result, StrideUB, PageSize;
  double Trial;

  struct AccessCount NA;

  /* CacheSize is measured in UnitSize units.  The granularity code */
  /* works on sizeof(int) units, for historical reasons.  Thus, we  */ 
  /* need to convert CacheSize appropriately.                       */

  /* Never design a microbenchmark to use multiple sizes! The bugs  */
  /* that result are subtle.                                        */

  CacheSize = CacheSize * UnitSize;    /* convert to bytes */
  CacheSize = CacheSize / sizeof(int); /* convert to sizeof(int) s */

  TestSize = CacheSize + CacheSize / 2;

  if (TestSize > (UPPERBOUNDINBYTES / sizeof(int)))
  {
    fprintf(LogFile,"\nFindGranularity() invoked with a Size that is");
    fprintf(LogFile,"too large.\nIncrease \"UPPERBOUNDINBYTES\",");
    fprintf(LogFile," recompile, and run the test again.\n");
    Abort("Test size too large for global array. See log file.\n",-1);
  }

  /* use a local copy of PageSize, adjusted to sizeof(int) */
  PageSize = GetOSPageSize() / sizeof(int);

  if (MAX_STRIDE >= PageSize)  /* only on very old systems */
    StrideUB = PageSize / 2;
  else
    StrideUB = MAX_STRIDE;

  /* We will run the stride from MIN_STRIDE to StrideUB words, time the results, 
   * and analyze them to see when the decline in spatial locality (indicated
   * by the slope of the piecewise linear curve) stops.
   */

  if (HeartBeat)
    fprintf(stderr,"Finding granularity at %s b\n",
	    PrintNum(CacheSize*sizeof(int)));

  /* find the number of accesses needed to provide MinTime ticks */
  NA.outer = 1;
  NA.inner = TestSize; /* want to run through entire permutation, at least */

  SetUpTrial( TestSize, 1 );
  Trial = 0;

  while (Trial < MinTime)
  {
    if (NA.inner < BigInt)
      NA.inner = NA.inner + NA.inner;
    else
      NA.outer = NA.outer + NA.outer;

    Trial = TimeStride( TestSize, 1, NA );
  }

  fprintf(LogFile,"Granularity Tests @ %s b need (%s ; %s) accesses.\n",
	  PrintNum(CacheSize*sizeof(int)),
	  PrintNum(NA.outer),PrintNum(NA.inner));
  fprintf(LogFile,"Stride\tTime\n");

  /* now, run a sweep over the strides, timing the accesses */
  for (i=0; i< TRIALS_FOR_MIN; i++)
  {
    if (HeartBeat > 1)
      fprintf(stderr,"-> Stride sweep number %d.\n",i);

    j = 0;
    k = MIN_STRIDE;
    while (k <= StrideUB)
    { 
      /* run the test */
      SetUpTrial( TestSize, k );

      Trial = TimeStride( TestSize, k, NA );
      if (i == 0)
      {
	Times[j] = Trial;
	Strides[j] = k;
      }
      else if (Trial < Times[j])
	Times[j] = Trial;

      j++;
      k = k + k; 
    }
    count = j;
  }

  if (Array != (int *) NULL)
  {
    PACE_FreeMem(Array);
    Array = (int *) NULL;
  }

  /* and write the results out to the Log File */
  for (i=0; i<count; i++)
  {
    fprintf(LogFile,"%s\t%.0f\n",PrintNum(Strides[i]),Times[i]);
  }

  /* Normalize to Cycles and write it to the Log, as well */
  for (i=0;i<count;i++)
  {
    Times[i] = Times[i] * 1000.0 / (double) NA.inner;
    Times[i] = Times[i] / (double) NA.outer;
    Times[i] = round( Times[i] / AddCostInNSec );
  }
 
  fprintf(LogFile,"\nNormalized to Cycles:\nStride\tTime\n");
  for (i=0;i<count;i++)
  {
    fprintf(LogFile,"%s\t%.0f\n",PrintNum(Strides[i]),Times[i]);
  }

  result = AnalyzeGran( Strides, Times, count );

  if (Verbose)
  {
    fprintf(LogFile,"Granularity at Level %s is %s b.\n",
	    PrintNum(CacheSize*sizeof(int)),PrintNum(result*sizeof(int)));
  }

  return result * sizeof(int);
}


/* The rationale for this routine is simple.
 * The time increase with stride arises from a a decrease in spatial
 * locality.
 * 
 * We walk the curve and look for the first point where the slope 
 * declines. The previous point should be the line size.
 *
 * The reality is a little more complex.  We look beyond the point
 * identified as line size to see if, in fact, the time keeps rising.
 *
 *
 */

static int AnalyzeGran( int Strides[], double Times[], int count )
{
  int i, j, result;

  double DT[count], Sum, Avg, OldAvg;

  j = -1;
  Sum = 0;
  OldAvg = 0;

  for (i=1;i<count;i++) /* find max point for running average */
  {                     
    DT[i] = Times[i] - Times[i-1];

    Sum += DT[i];
    Avg = Sum / ((double)(i));

    if (Avg >= OldAvg)
    {
      OldAvg = Avg;
      j = i;
    }
  }

  result = Strides[j];
  fprintf(LogFile,"\"--> Line size is %s b (%s w).\"\n",
	  PrintNum(result*sizeof(int)),PrintNum(result));

  if (HeartBeat)
  {
    if (result == -1)
    {
      fprintf(stderr," -> Line size not detected.\n");
      fprintf(LogFile,"\"-> Line size not detected.\"\n");
    }
    else
      fprintf(stderr," -> Line size found to be %s b (%s w).\n",
	      PrintNum(result*sizeof(int)),PrintNum(result));
  }

  return result;
}

