/* Copyright 2011, Rice University.  All rights reserved.
   No warranty of usability express or implied.  Have a lovely day! */
#include "MemTest.h"
#include "BlackJackTimer.h"

static int *ColumnIndexSet   = (int *) NULL;
static int *RowIndexSet      = (int *) NULL;
static int StartingPoint     = -1;

static int count  = 0;
static int paramwarning = 0;
static int complain = 0;

static void **Old      = (void **) NULL;
static void **MemArray = (void **) NULL;

static void   FindNA( int s, int bs, int st, struct AccessCount *NA );
static void   BuildBCPerm( int as, int bs, int st );
static double BCTest( int as, int bs, int st, struct AccessCount NA );
static void   BCTrial( int S[], double T[], int c, int bs, int st );

/* BuildBCPerm
 *
 * Constructs, in global array MemArray, a block cyclic permutation
 *
 * PARAMETERS: ArraySize, BlockSize, Stride
 *
 * RETURNS: void
 *
 */

static void BuildBCPerm( int ArraySize, /* see BCTest */
			 int BlockSize, /* see BCTest */
			 int Stride     /* see BCTest */ )
{
  int i, j, row, col, ThisElt, LastElt, count;
  void **p;

  int NColumns = BlockSize / Stride;
  int NRows    = ArraySize / BlockSize;

  if (ArraySize != NColumns*NRows*Stride)
  {
    if (paramwarning == 0)
    {
      fprintf(LogFile,"\n\"BuildBCPerm: parameters have wrong ratio to elicit ");
      fprintf(LogFile,"the desired effect.\"\n");
      fprintf(LogFile,
           "\"Array size (%s) != NColumns (%s) x NRows (%s) x Stride (%s).\"\n",
	      PrintNum(ArraySize*UnitSize),PrintNum(NColumns), 
	      PrintNum(NRows), PrintNum(Stride*UnitSize));

      fprintf(LogFile,"*** In this test, it does not matter ***\n\n");
      paramwarning++;
    }

    i = BlockSize;
    while((ArraySize / BlockSize) * BlockSize != ArraySize)
    {
      BlockSize = BlockSize / 2;
    }
  }

  if (NColumns < 8 && complain == 0)
  {
    fprintf(LogFile,"\"BlockSize (%s w) is small relative to Stride (%s w).\"\n",
	    PrintNum(BlockSize),PrintNum(Stride));
    /* Abort("Structural problem with BlockSize and Stride.",-1); */
    complain++;
  }

  if (NRows < 1)
  {
    fprintf(LogFile,"\"BlockSize (%s w) is larger than ArraySize (%s w).\"\n",
	    PrintNum(BlockSize),PrintNum(ArraySize));
    Abort("Structural problem in use of BuildBCPerm, see logfile.",-1);
  }

  ColumnIndexSet = PACE_AllocMem( NColumns * sizeof(int) );
  RowIndexSet    = PACE_AllocMem( NRows * sizeof(int) );

  (void) GenerateLinearSet( ColumnIndexSet, NColumns, Stride );
  (void) Shuffle( ColumnIndexSet, NColumns );

  (void) GenerateLinearSet( RowIndexSet, NRows, 1 );

  /*  (void) Shuffle( RowIndexSet, NRows, 1 ); */

  Old = MemArray;
  MemArray = PACE_AllocMem(ArraySize * UnitSize);
  if (Old != (void **) NULL)
  {
    PACE_FreeMem(Old);
    Old = (void **) NULL;
  }

  /*  Assemble the permutation ...
   *
   *  The right way to think of this permutation is as an array 
   *  in row major order.  The first index set picks columns at
   *  random, separated by Stride.  The second index set picks
   *  rows at random, with unit stride among rows. (E.g., we use
   *  all the rows but space among the columns by Stride to avoid
   *  spatial locality.  Spatial locality is bad for the measurements
   *  because it simply decreases the magnitude of a transition.)
   *
   *  The starting point for the permutation is 
   *   
   *     MemArray[RowIndexSet[0]][ColumnIndexSet[0]]
   *   
   *  We link together the elements of MemArray in the permutation
   *  by choosing a column (ColumnIndexSet[i]) and linking together
   *  the row entries for that column in the order dictated by 
   *  RowIndexSet[0 ... NBlocks].  We link the last one of those to
   *  the first element of the next columrn, and continue.
   *
   *  To simplify the loop (and complicate the explanation), we link
   *  element a to element b by making MemArray[b] contain a.  Thus,
   *  when TestPerm walks the array, it will walk it in the opposite
   *  of the order in which it is constructed.  (In otherwords, it 
   *  will move from b to a rather than from a to b.
   *
   */ 

  /* To reduce TLB effects, we want to reverse the order of traversal --
   * that is, run down a row at "Stride" (randomized), then pick another
   * row, at random. 
   *
   *
   */

  LastElt = -1;
  for (i=0;i<NRows;i++)
  {
    row = RowIndexSet[i] * NColumns *Stride;    
    /* col = ColumnIndexSet[i]; */
    for (j=0;j<NColumns;j++)
    {
      ThisElt = row + ColumnIndexSet[j];
      if (LastElt != -1)
	MemArray[ThisElt] = &MemArray[LastElt]; 
      if (Debug>1)
	fprintf(LogFile,"M[%s] <- %s.\t\t(%d,%d)\t(%d * %d) + %d\n",
		PrintNum(ThisElt),PrintNum(LastElt),i,j,
		RowIndexSet[i],NColumns*Stride,ColumnIndexSet[j]);
      LastElt = ThisElt;
    }

  }
  /* and, finally, MemArray[0][0] <- MemArray[NRows-1][NCols-1] */
  StartingPoint = ColumnIndexSet[0] + (RowIndexSet[0] * NColumns * Stride);
  MemArray[StartingPoint] = &MemArray[LastElt]; 
  if (Debug>1)
    fprintf(LogFile,"M[%s] <- %s. ** starting point **\n",
	    PrintNum(StartingPoint),PrintNum(LastElt));

  /* verify permutation */
  p = MemArray[StartingPoint];
  i = 0;
  count = NColumns * NRows;
  while (p != &MemArray[StartingPoint])
  {
    p = *p;
    i++;
    if (i > count)
    {
      fprintf(stderr,"Cycle did not return to starting point.\n");
      fprintf(stderr,"Cycle length is %s of %s.\n",
	      PrintNum(i),PrintNum(count));
      break;
    }
  }
  if ((Debug) && (i == count-1))
    fprintf(stderr,"Maximal length permutation.\n");

  PACE_FreeMem(ColumnIndexSet);
  ColumnIndexSet = (int *) NULL;

  PACE_FreeMem(RowIndexSet);
  RowIndexSet = (int *) NULL;
}

/* BCTest 
 *
 * Conducts a single block-cyclic memory test
 *
 * PARAMETERS:  ArraySize, BlockSize, Stride, NAccesses
 *
 * RETURNS:     An elapsed time, in microseconds, as a double
 *
 */

static double BCTest( int ArraySize,  /* footprint for test, in WORDS   */
		      int BlockSize,  /* size of each randomized block  */
		      int Stride,     /* distance between accesses      */
		      struct AccessCount NA /* number of iterations     */
	       )
{
  int i;
  double result;
  
  if (HeartBeat > 1)
    fprintf(stderr,"Trial @ %s b: ",PrintNum(ArraySize*UnitSize));

  /* Initialize MemArray */
   BuildBCPerm(ArraySize,BlockSize,Stride);

  /* Run the test */
   result = TimePermPtr(MemArray, StartingPoint, NA);

  if (HeartBeat > 1)
    fprintf(stderr,"%s usec\n",PrintDNum(result));

  return result;
}

/* BCTrial
 *
 * PARAMETERS: Sizes[], Times[] Size
 *
 */

static void BCTrial ( int    Sizes[],    /* Array of trial sizes to run */
	       double Times[],    /* Minimum times from the runs */
	       int    Count,      /* number of entries in Sizes & Times */
	       int    BlockSize,  /* Block size for the test     */
	       int    Stride )    /* Access stride for the test  */
{
  int i, j, k, NotDone, BigInt;
  int Counters[Count];

  double Trial;

  struct AccessCount NAccesses;

  complain = 0;
  HeartBeat = 0;
  
  FindNA( Sizes[0], BlockSize, Stride, &NAccesses );

  if (Debug>1)
  {
    fprintf(LogFile,"\nNOTLB Test of %s points between %s b to %s b.\n",
	    PrintNum(Count), PrintNum(Sizes[0]*UnitSize),
	    PrintNum(Sizes[Count-1]*UnitSize));
    fprintf(LogFile,"Blocks of %s b, Stride of %s b.\n",
	    PrintNum(BlockSize*UnitSize),PrintNum(Stride*UnitSize));
    fprintf(LogFile,"( %s ; %s ) accesses.\n",
	    PrintNum(NAccesses.outer),PrintNum(NAccesses.inner));
  }

  for (i=0; i<Count; i++)
  {
    Counters[i] = TRIALS_FOR_MIN; /* set the counter for this entry */
  }

  NotDone = 1;
  i = 1;
  k = 0;
  while(NotDone)
  {
    if (HeartBeat)
      fprintf(stderr,"Starting NOTLB series %2d.",i);

    NotDone = 0;
    for (j=0; j<Count; j++)
    {
      if (Counters[j])
      {
	Trial = BCTest(Sizes[j],BlockSize,Stride,NAccesses);
	k++;
	if (i == 1)
	  Times[j] = Trial;
	else if (Trial < Times[j])
	{
	  Times[j] = Trial;
	  Counters[j] = TRIALS_FOR_MIN;
	}
	else
	{
	  Counters[j]--;
	}
	if (Counters[j] > 0)
	{
	  NotDone = 1;
	}
      }
    }
    if (HeartBeat)
      fprintf(stderr," Tested %2d points.\n",k);
    k = 0; i++;
  }

  if (Debug > 1)
  {
    fprintf(LogFile,"\nSize\tTime\n");
    for (j=0; j<Count; j++)
    {
      fprintf(LogFile,"%s\t%s\n",
	      PrintNum(Sizes[j]*UnitSize),PrintDNum(Times[j]));
    }
  }
  if (MemArray != (void **) NULL)
  {
    PACE_FreeMem(MemArray);
    MemArray = (void **) NULL;
  }
}

static void FindNA( int Size, int BlockSize, int Stride, 
		    struct AccessCount *NA)
{
  double Trial;

  if (Verbose > 1)
    fprintf(LogFile,"FindNA( %s, %s, %s (? ; ?)\n",
	    PrintNum(Size),PrintNum(BlockSize),PrintNum(Stride));

  NA->outer = 1;
  NA->inner = 2 * (Size / Stride);

  /* TimePerm needs NA->inner to be an integral multiple of 10 */
  NA->inner = NA->inner + 10 - (NA->inner % 10);

  Trial = 0;
  while(Trial < MinTime)
  {
    if (NA->inner < BigInt)
      NA->inner = NA->inner + NA->inner;
    else 
      NA->outer = NA->outer + NA->outer;
    Trial = BCTest(Size, BlockSize, Stride, *NA);
  }
  if (Verbose > 1)
    fprintf(LogFile,"->FindNA returns (%s ; %s).\n",
	    PrintNum(NA->outer),PrintNum(NA->inner));
}

/* Simple test to confirm or deny behavior */
int IsItACache( int SuspectSize, int InitStride, int MaxStride )
{
  int Sizes[3];
  int Strides[MAX_TESTS_PER_RUN];
  double Times[MAX_TESTS_PER_RUN][3];
  double Trial;

  int InitBlockSize = PageSize;

  int Stride, Confirm, NPages;
  int i, j, nTests, SavedHeartBeat;
  double t1, t2, ratio;

  if (HeartBeat)
    fprintf(stderr,"CorD: Testing suspect point @ %sb with strides %s to %s.\n",
	    PrintNum(SuspectSize*UnitSize),PrintNum(InitStride),
	    PrintNum(MaxStride));

  SavedHeartBeat = HeartBeat;
  HeartBeat = 0; 

  fprintf(LogFile,
    "\"CorD: Testing suspect point @ %s b with strides %s b to %s b.\"\n",
	    PrintNum(SuspectSize*UnitSize),PrintNum(InitStride*UnitSize),
	    PrintNum(MaxStride*UnitSize));

  NPages = SuspectSize / PageSize;
  fprintf(LogFile,"\nIsItACache( %s b ) testing %s pages.\n",
	  PrintNum(SuspectSize*UnitSize),PrintNum(NPages));

  if (NPages < 256)
  {
    Sizes[0] = SuspectSize - log2(NPages) * PageSize;
    Sizes[1] = SuspectSize;
    Sizes[2] = SuspectSize - log2(NPages) * PageSize;
  }
  else 
  {
    Sizes[0] = SuspectSize - 16 * PageSize;
    Sizes[1] = SuspectSize;
    Sizes[2] = SuspectSize + 16 * PageSize;
  }

  nTests = 0;
  Stride = InitStride;
  while (Stride <= MaxStride)
  {
    BCTrial(Sizes, &Times[nTests][0], 3, InitBlockSize, Stride); 
    Strides[nTests] = Stride;
    nTests++;
    Stride = 2 * nTests * InitStride;
  }

  Confirm = 1;
  for (i=0;i<nTests;i++)
  {
    t1 = fmax(Times[i][1] - Times[i][0],1.0);
    t2 = fmax(Times[i][2] - Times[i][1],1.0);
    ratio = t2 / t1;

    if (Debug>1)
    {
      for (j=0;j<3;j++)
	fprintf(LogFile,"%12s\t%10s\n",
		PrintNum(Sizes[j]*UnitSize),PrintDNum(Times[i][j]));

      fprintf(LogFile,"@ Stride %6s b, ratio is %f / %f = %f.\n",
	      PrintNum(Strides[i]*UnitSize),t2, t1, ratio); 
    }

    if ((t1 == 1.0 && t2 > fmax(TimerTick * 0.1,2)) ||
	ratio > CorDThreshold)
      Confirm  = Confirm & 1;
    
    else 
      Confirm = 0;
  }

  if (ColumnIndexSet != (int *) NULL)
  {
    PACE_FreeMem(ColumnIndexSet);
    ColumnIndexSet = (int*) NULL;
  }

  if (RowIndexSet != (int *) NULL)
  {
    PACE_FreeMem(RowIndexSet);
    RowIndexSet = (int*) NULL;
  }

  if (MemArray != (void **) NULL)
  {
    PACE_FreeMem(MemArray);
    MemArray = (void**) NULL;
  }

  HeartBeat = SavedHeartBeat;
  return Confirm;
}

