/* Copyright 2011, Rice University.  All rights reserved.
   No warranty of usability express or implied.  Have a lovely day! */
#include "MemTest.h"


static int Monotone( int Data[], int Count )
{
  int i;

  for (i=1;i<Count;i++)
  {
    if (Data[i] < Data[i-1])
      Data[i] = Data[i-1];
  }
}


static int Hist( int Data[], int Count, int HA[], int HD[], int Range )
{
  int i, j;
  int TH[Range];

  /* Initialize */
  for (i=0;i<Range;i++)
  {
    HA[i] = 0;  /* Compressed histogram array (return value) */
    HD[i] = 0;  /* Compressed historgam data array (return value) */
    TH[i] = 0;  /* Temporary array */
  }

  /* Compute histogram */
  for (i=0;i<Count;i++)
    TH[Data[i]]++;


  /* Compress histogram */
  j  = 0;
  for (i=0;i<Range;i++)
  {
    if (TH[i] > 0)
    {
      HA[j] = TH[i];
      HD[j] = i;
      j++;
    }
  }

  return j;
}

static int Smooth( int HA[], int HD[], int NPoints )
{
  int i, j, tolerance;
  int THA[NPoints], THD[NPoints];

  /* Smooth the data */
  for (i=0;i<NPoints-1;i++)
  {
    tolerance = (int) round(0.1 * (float) HD[i]);
    if (Abs(HD[i+1]- HD[i]) <= tolerance)
    {
      HA[i+1]   = HA[i] + HA[i+1];  /* pull lower value up since it is */
      HA[i]     = 0;                /* used as threshold for size test */
    }      
  }

  /* Compress the HA and HD arrays with a copy-in, copy-out algorithm */
  j = 0;
  for (i=0;i<NPoints;i++)
  {
    if (HA[i] != 0)
    {
      HA[j] = HA[i];
      HD[j] = HD[i];
      j++;
    }
  }
  for (i=j;i<NPoints;i++)
  {
    HA[i] = 0;
    HD[i] = 0;
  }
  return j;
}

static int Range( int HD[], int NPoints )
{
  int i, j, result;

  result = 0;
  for (i=0;i<NPoints;i++)
  {
    if (result < HD[i])
      result = HD[i];
  }
  return result;
}


static void ChooseLatency( int Cycles[], int LB, int UB, int Level, 
			   char *FilePrefix, int FileSuffix){
  int i, j, range, NPoints, maxct, maxval;
  int *Subset, *HD, *HA;

  char fname[32];

  NPoints = UB - LB;

  Subset = (int *) malloc((NPoints+2) * sizeof(int));

  if (Subset == NULL) 
  {
    fprintf(stderr,"malloc() fails. See log.\n");
    fprintf(LogFile,"\nIn ChooseLatency, malloc() returns NULL.\n");
    fprintf(LogFile,"Analysis cannot proceed.\n");
    exit(-1);
  }

  j = 0;
  for(i=LB;i<UB;i++)
    Subset[j++] = Cycles[i];

  range = Range(Subset, NPoints);

  HA     = (int *) malloc((range+2) * sizeof(int));
  HD     = (int *) malloc((range+2) * sizeof(int));

  if ((HA == NULL) || (HD == NULL))
  {
    fprintf(stderr,"malloc() fails. See log.\n");
    fprintf(LogFile,"\nIn ChooseLatency, malloc() returns NULL.\n");
    fprintf(LogFile,"Analysis cannot proceed.\n");
    exit(-1);
  }

  j = Hist( Subset, NPoints, HA, HD, range+1);

  maxct  = 0; 
  maxval = 0;
  for (i=0;i<j;i++)
  {
    if (HA[i] > maxct)
    {
      maxct = HA[i];
      maxval = HD[i];
    }
  }
  fprintf(stderr,"Most common latency at this level is %d.\n",maxval);
  sprintf(fname,"%s%02dL-%dSpot",FilePrefix,Level,FileSuffix);
  WriteResult(fname,maxval);

  free(Subset);
  free(HA);
  free(HD);
}

void AnalyzeTLB( int Sizes[], int Cycles[], int Count, char *FilePrefix, 
		 int SmoothFlag, int FileSuffix )
{
  int i, j, k, UB;
  int start, end, level;
  int *CHA, *CHD;

  char fname[32];

  /* First, apply domain knowledge and make Cycles monotone nondecreasing */
  Monotone( Cycles, Count );
  fprintf(LogFile,"\nMonotone data:\n");  /* for debugging & graphs */
  for (i=0;i<Count;i++)
    fprintf(LogFile,"%s\t%s\n",PrintNum(Sizes[i]*UnitSize),PrintNum(Cycles[i]));

  /* Next, find the dynamic range of Cycles */
  UB = Range( Cycles, Count );
  fprintf(LogFile,"\nDynamic range is 0 to %d cycles.\n",UB);

  /* Next, use an analog of bucket sort to create a histogram */
  UB ++ ;    /* Overallocate to simplify the Hist routine */

  CHA = (int *) malloc((UB+2) * sizeof(int));
  CHD = (int *) malloc((UB+2) * sizeof(int));

  if ((CHA == NULL) || (CHD == NULL))
  {
    fprintf(stderr,"malloc() fails. See log.\n");
    fprintf(LogFile,"\nIn IntAnalyze, malloc() returns NULL.\n");
    fprintf(LogFile,"Analysis cannot proceed.\n");
    exit(-1);
  }

  /* Step 1 -- compute a histogram of Cycles -> CHA and CHD */
  k = Hist( Cycles, Count, CHA, CHD, UB+1 );

  fprintf(LogFile,"\n\nHistogram of Latencies\nCycles\tN\n");
  j = 0;  
  for (i=0;i<k;i++)
  {
    if (CHA[i] > 0)
    {
      fprintf(LogFile,"%4d\t%4d\n",CHD[i],CHA[i]);
      j += CHA[i];
    }
    else 
      fprintf(LogFile,"CHA[%d] is zero.\n",i);
  }
  fprintf(LogFile,"Histogram captures %d points (of %d total).\n",
	  j,Count);

  /* Step 2 - for the size test, smooth the data, pulling values */
  /* upward.  The results of this smoothing are only used in the */
  /* cache size test.  (Latency will come from unsmoothed data.) */
  /* */
  /* Smoothing combines adjacent points in the histogram that    */
  /* differ by 10% or less.                                      */
  /* */
  if (SmoothFlag)
  {
    k =  Smooth( CHA, CHD, k );
  
    fprintf(LogFile,"\n\nSmoothed Histogram of Latencies\nCycles\tN\n");
    j = 0;
    for (i=0;i<k;i++)
    {
      if (CHA[i] > 0)
      {
	fprintf(LogFile,"%4d\t%4d\n",CHD[i],CHA[i]);
	j += CHA[i];
      }
      else 
	fprintf(LogFile,"CHA[%d] is zero.\n",i);
    }
    fprintf(LogFile,"Histogram captures %d points (of %d total).\n",
	    j,Count);
  }

  /* At this point, CHD contains the possible thresholds for levels */
  /* in the cache.  The points are in ascending order. A point must */
  /* have four occurrences to span a power of two range in array    */
  /* size -- as we assume that a real cache will.  (We don't expect */
  /* a 32 KB L1 and a 48 KB L2 -- maybe 32 KB and 64 KB.)           */

  
  start = 1;      /* first index in the Cycles[] array    */
  end   = Count;  /* last index + 1 in the Cycles[] array */
  level = 1;
  for (i=0; i<k; i++)  /* i is index into compressed, smoothed histogram */
  {
    if (CHA[i] > 3)    /* => CHD[i] is a level */
    {
      fprintf(stderr,"\nSearching for level %d, starting at %s b.\n",
	      level,PrintNum(Sizes[start]*UnitSize));
      for (j=start;j<end;j++)
      {
        if (CHD[i] < Cycles[j])
	{
	  fprintf(stderr,"\nFound level boundary at %s b.\n",
		  PrintNum(Sizes[j-1]*UnitSize));
	  fprintf(stderr,"-> transition from %d cycles to %d cycles.\n",
		  Cycles[j-1],Cycles[j]);

	  sprintf(fname,"%s%02dS-%dSpot",FilePrefix,level,FileSuffix);
	  WriteResult(fname,Sizes[j-1]*UnitSize);
	    
	  /* find & record most common latency */	    
	  ChooseLatency(Cycles,start,j,level,FilePrefix,FileSuffix);  

	  start = j;
	  level++;
	  break;
	}
	else
	  fprintf(stderr,"*");
      }
    }
  }
  fprintf(stderr,"\n");
  free (CHA);
  free (CHD);
}
