/* Copyright 2011, Rice University.  All rights reserved.
   No warranty of usability express or implied.  Have a lovely day! */
#include "MemTest.h"
#include "BlackJackTimer.h"

double round( double x );

static int *ColumnIndexSet   = (int *) NULL;
static int *RowIndexSet      = (int *) NULL;
static int StartingPoint     = -1;

static int count  = 0;
static int paramwarning = 0;
static int complain = 0;

static void **MemArray = (void **) NULL;

static int AlwaysConfirm( int s );

/* BuildBCPerm
 *
 * Constructs, in global array MemArray, a block cyclic permutation
 *
 * PARAMETERS: ArraySize, BlockSize, Stride
 *
 * RETURNS: void
 *
 */

void BuildBCPerm( int ArraySize, /* see BCTest */
		  int BlockSize, /* see BCTest */
		  int Stride     /* see BCTest */ )
{
  int i, j, col, ThisElt, LastElt, count;

  int NColumns = BlockSize / Stride;
  int NRows    = ArraySize / BlockSize;

  void **Old, **p;

  if (ArraySize != NColumns*NRows*Stride)
  {
    if (paramwarning == 0)
    {
      fprintf(stderr,
	      "\nBuildBCPerm: problem with ratio of array and block sizes.");
      fprintf(stderr," See log file.\n\n");

      fprintf(LogFile,"\n\"BuildBCPerm: parameters have wrong ratio to elicit ");
      fprintf(LogFile,"the desired effect.\"\n");
      fprintf(LogFile,
           "\"Array size (%s) != NColumns (%s) x NRows (%s) x Stride (%s).\"\n",
	      PrintNum(ArraySize),PrintNum(NColumns),PrintNum(NRows),
	      PrintNum(Stride));
      fprintf(LogFile,"\"[NColumns (%s) x NRows (%s) x Stride (%s) = %s.]\"\n",
	      PrintNum(NColumns),PrintNum(NRows),PrintNum(Stride),
	      PrintNum(NColumns*NRows*Stride));
      fprintf(LogFile,"\n\"Some block sizes may be reset.\"\n");
      fprintf(LogFile,"\"Results may show incorrect behavior.\"\n\n");
      paramwarning++;
    }

    i = BlockSize;
    while((ArraySize / BlockSize) * BlockSize != ArraySize)
    {
      BlockSize = BlockSize / 2;
    }

    if (i != BlockSize)
    {
      fprintf(LogFile,
	    "\"Block size reset: array size %s b, block size %s b, ",
	    PrintNum(ArraySize),PrintNum(i));
      fprintf(LogFile,"new block size is %s b.\"\n",PrintNum(BlockSize));
    }
  }

  if (NColumns < 8 && complain == 0)
  {
    fprintf(LogFile,"\"BlockSize (%s b) is small relative to Stride (%s b).\"\n",
	    PrintNum(BlockSize*UnitSize),PrintNum(Stride*UnitSize));
    /* Abort("Structural problem with BlockSize and Stride.",-1); */
    complain++;
  }

  if (NRows < 1)
  {
    fprintf(LogFile,"\"BlockSize (%s w) is larger than ArraySize (%s w).\"\n",
	    PrintNum(BlockSize),PrintNum(ArraySize));
    Abort("Structural problem in use of BuildBCPerm, see logfile.",-1);
  }

  ColumnIndexSet = PACE_AllocMem( NColumns * sizeof(int) );
  RowIndexSet    = PACE_AllocMem( NRows * sizeof(int) );

  (void) GenerateLinearSet( ColumnIndexSet, NColumns, Stride );
  (void) Shuffle( ColumnIndexSet, NColumns );

  (void) GenerateLinearSet( RowIndexSet, NRows, 1 );
  /*  (void) Shuffle( RowIndexSet, NRows, 1 ); */

  /* Allocate the array */
  Old      = MemArray;
  MemArray = PACE_AllocMem(ArraySize * UnitSize);
  if (Old != (void **) NULL)
  {
    PACE_FreeMem(Old);
    Old = (void **) NULL;
  }

  /*  Assemble the permutation ...
   *
   *  The right way to think of this permutation is as an array 
   *  in row major order.  The first index set picks columns at
   *  random, separated by Stride.  The second index set picks
   *  rows at random, with unit stride among rows. (E.g., we use
   *  all the rows but space among the columns by Stride to avoid
   *  spatial locality.  Spatial locality is bad for the measurements
   *  because it simply decreases the magnitude of a transition.)
   *
   *  The starting point for the permutation is 
   *   
   *     MemArray[RowIndexSet[0]][ColumnIndexSet[0]]
   *   
   *  We link together the elements of MemArray in the permutation
   *  by choosing a column (ColumnIndexSet[i]) and linking together
   *  the row entries for that column in the order dictated by 
   *  RowIndexSet[0 ... NBlocks].  We link the last one of those to
   *  the first element of the next columrn, and continue.
   *
   *  To simplify the loop (and complicate the explanation), we link
   *  element a to element b by making MemArray[b] contain a.  Thus,
   *  when TestPerm walks the array, it will walk it in the opposite
   *  of the order in which it is constructed.  (In otherwords, it 
   *  will move from b to a rather than from a to b.
   *
   */ 

  LastElt = -1;
  for (i=0;i<NColumns;i++)
  {
    col = ColumnIndexSet[i];
    for (j=0;j<NRows;j++)
    {
      ThisElt = col + (RowIndexSet[j] * NColumns * Stride);
      MemArray[ThisElt] = &MemArray[LastElt];  /* MemArray[j][i] <-  LastElt */
      if (Debug>1)
	fprintf(LogFile,"M[%s] <- %s.\t\t(%d,%d)\t%d + (%d * %d)\n",
		PrintNum(ThisElt),PrintNum(LastElt),j,i,
		col,RowIndexSet[j],NColumns*Stride);
      LastElt = ThisElt;
    }
  }
  /* and, finally, MemArray[0][0] <- MemArray[NRows-1][NCols-1] */
  StartingPoint = ColumnIndexSet[0] + (RowIndexSet[0] * NColumns * Stride);
  MemArray[StartingPoint] = &MemArray[LastElt]; 
  if (Debug>1)
    fprintf(LogFile,"M[%s] <- %s. ** starting point **\n",
	    PrintNum(StartingPoint),PrintNum(LastElt));

  /* verify permutation */
  p = MemArray[StartingPoint];
  j = 0;
  count = NColumns * NRows;
  while (p != &MemArray[StartingPoint])
  {
    p = *p;
    if (j++ > count)
    {
      fprintf(stderr,"Cycle did not return to starting point.\n");
      fprintf(stderr,"Cycle length is %s of %s.\n",
	      PrintNum(j),PrintNum(count));
      break;
    }
  }
  if ((Debug) && (j == count-1))
    fprintf(stderr,"Maximal length permutation.\n");

  PACE_FreeMem(ColumnIndexSet);
  ColumnIndexSet = (int *) NULL;

  PACE_FreeMem(RowIndexSet);
  RowIndexSet = (int *) NULL;
}



/* BCTest 
 *
 * Conducts a single block-cyclic memory test
 *
 * PARAMETERS:  ArraySize, BlockSize, Stride, NAccesses
 *
 * RETURNS:     An elapsed time, in microseconds, as a double
 *
 */

double BCTest( int ArraySize,  /* footprint for test, in WORDS            */
	       int BlockSize,  /* size of each randomized block, in WORDS */
	       int Stride,     /* distance between unrandomized accesses  */
	       struct AccessCount NA          /* number of iterations     */
	       )
{
  int i;
  double result;
  
  if (HeartBeat > 1)
    fprintf(stderr,"Trial @ %s b: ",PrintNum(ArraySize*UnitSize));

  /* Initialize MemArray */
   BuildBCPerm(ArraySize,BlockSize,Stride);

  /* Run the test */
   result = TimePermPtr(MemArray, StartingPoint, NA);

  if (HeartBeat > 1)
    fprintf(stderr,"%s usec\n",PrintDNum(result));

  return result;
}

/* BCTrial
 *
 * PARAMETERS: Sizes[], Times[] Size
 *
 */

void BCTrial ( int    Sizes[],    /* Array of trial sizes to run */
	       double Times[],    /* Minimum times from the runs */
	       int    Cycles[],   /* Times, converted to cycles  */
	       int    Count,      /* number of entries in arrays */
	       int    BlockSize,  /* Block size for the test     */
	       int    Stride )    /* Access stride for the test  */
{
  int i, j, k, m, NotDone, BigInt, SaveHeartBeat;
  int NColumns, NRows;
  double Trial, Events;

  int Counters[Count];

  struct AccessCount NAccesses;

  NColumns = BlockSize / Stride;
  NRows    = Sizes[0]  / BlockSize;  

  if (Sizes[1] != NColumns*NRows*Stride)
  {
    i = BlockSize;
    while((Sizes[1] / BlockSize) * BlockSize != Sizes[1])
    {
      BlockSize = BlockSize / 2;
    }
    if (i != BlockSize)
    {
      fprintf(stderr,
	      "\nBCTrial invoked with a ratio of array and block sizes\n");
      fprintf(stderr,"that does not work with the permutation.\n");
      fprintf(stderr,"-> BlockSize reset from %s b to %s b.\n\n",
	      PrintNum(i*UnitSize),PrintNum(BlockSize*UnitSize));
    }
  }  

  complain = 0;
  SaveHeartBeat = HeartBeat;
  HeartBeat = 0;
  
  FindNA( Sizes[0], BlockSize, Stride, &NAccesses );

  HeartBeat = SaveHeartBeat;

  fprintf(LogFile,"\n\"Block Cyclic Test of %s points between %s b to %s b.\"\n",
	  PrintNum(Count), PrintNum(Sizes[0]*UnitSize),
	  PrintNum(Sizes[Count-1]*UnitSize));
  fprintf(LogFile,"\"Blocks of %s b, Stride of %s b.\"\n",
	  PrintNum(BlockSize*UnitSize),PrintNum(Stride*UnitSize));
  fprintf(LogFile,"\"( %s ; %s ) accesses.\"\n",
	  PrintNum(NAccesses.outer),PrintNum(NAccesses.inner));
  fprintf(LogFile,"\nSize\tTime\n");

  for (i=0; i<Count; i++)
  {
    Counters[i] = TRIALS_FOR_MIN;
  }

  /* run the trial until every point has a "good" time */
  NotDone = 1;
  i = 1;
  k = 0;
  m = 4;
  while(NotDone)
  {
    if (HeartBeat)
      fprintf(stderr,"Starting BC Test series %2d.",i);

    NotDone = 0;
    for (j=0; j<Count; j++)
    {
      if (Counters[j])
      {
	/*fprintf(stderr,"%15s ",PrintNum(Sizes[j]*UnitSize));*/
	m++;
        if (m == 4)
	{
	  /*  fprintf(stderr,"\n"); */
	  m = 0;
	}
	Trial = BCTest(Sizes[j],BlockSize,Stride,NAccesses);
	k++;
	if (i==1)
	  Times[j] = Trial;
	else if (Trial < Times[j])
	{
	  Times[j] = Trial;
	  Counters[j] = TRIALS_FOR_MIN;
	}
	else
	{
	  Counters[j]--;
	}
	if (Counters[j])
	  NotDone = 1;
      }
    }
    if (HeartBeat)
      fprintf(stderr," Tested %d points.\n",k);
    k = 0; i++;
  }

  if (MemArray != (void **) NULL)
  {
    PACE_FreeMem(MemArray);
    MemArray = (void **) NULL;
  }

  /* Convert to Cycles */
  if (NAccesses.outer == 1)  /* need to know the number of accesses we made */
    Events = (double) NAccesses.inner;
  else 
    Events = (((double) NAccesses.outer) - 1.0) * (double) BigInt 
           + (double) NAccesses.inner;

  for (i=0; i<Count; i++)
  {
    /* Times is in Microseconds, AddCost is in Nanoseconds */
    Cycles[i] = round(1000.0 * Times[i] / (Events * AddCost));
  }

  for (i=0; i<Count; i++)
  {
    fprintf(LogFile,"%s\t%s\t%d\n",
	    PrintNum(Sizes[i]*UnitSize),PrintDNum(Times[i]),
	    Cycles[i]);
  }
  IntAnalyze( Sizes, Cycles, Count, "BC", /* smooth */ 1, AlwaysConfirm );
}

void FindNA( int Size, int BlockSize, int Stride, 
		      struct AccessCount *NA)
{
  double Trial;

  if (Verbose > 1)
    fprintf(LogFile,"FindNA( %s, %s, %s (? ; ?)\n",
	    PrintNum(Size),PrintNum(BlockSize),PrintNum(Stride));

  NA->outer = 1;
  NA->inner = 2 * (Size / Stride);

  /* TimePerm wants NA->inner to be a multiple of 10 */
  NA->inner = NA->inner + 10 - (NA->inner % 10);

  Trial = 0;
  while(Trial < MinTime)
  {
    if (NA->inner < BigInt)
      NA->inner = NA->inner + NA->inner;
    else 
      NA->outer = NA->outer + NA->outer;
    Trial = BCTest(Size, BlockSize, Stride, *NA);
  }
  if (Verbose > 1)
    fprintf(LogFile,"->FindNA returns (%s ; %s).\n",
	    PrintNum(NA->outer),PrintNum(NA->inner));
}


static int AlwaysConfirm( int s )
{
  return 1;
}
