/* Copyright 2011, Rice University.  All rights reserved.
   No warranty of usability express or implied.  Have a lovely day! */
/*******************************************************************************
 * $HeadURL: svn+ssh://orion.cs.rice.edu/home/jasandov/phd/svn/code/characterization/cache_test/dcache/dcache.c $
 * $Revision: 1922 $
 * $Author: jasandov $
 * $Date: 2010-05-03 11:06:55 -0500 (Mon, 03 May 2010) $
 ******************************************************************************/
#include "dcache.h"
#include "PACE_timer.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <math.h>
#include <unistd.h>
#include <ctype.h>

/*******************************************************************************
 * Local declarations
 ******************************************************************************/

static const unsigned int debug = 0;
static const unsigned int check_results = 0;
static const unsigned int clear_mem = 0;


static void check_linked_indices_rel(const unsigned int data_count,
                                     const int data[],
                                     const unsigned int indices_count,
                                     const unsigned int indices [],
                                     const unsigned int has_header);


static void check_linked_indices_abs(const unsigned int data_count,
                                     const void * const data[],
                                     const unsigned int indices_count,
                                     const unsigned int indices [],
                                     const unsigned int has_header);

/*******************************************************************************
 * Function Definitions
 ******************************************************************************/



/*******************************************************************************
 * This function keeps track of the last time a call was made to
 * filtered_print, and then it returns true if the current call is
 * beyond the delay (otherwise it returns false); this routine is used
 * for filtering debug messages according to some pace.
 ******************************************************************************/
int filtered_print(double delay) {
  static double last = 0.0;
  const double current = PACE_clock();
  if (current > (last + delay)) {
    last = current;
    return 1;
  } else {
    return 0;
  }
}

/*******************************************************************************
 * This function parses a string for a floating point number followed
 * by an optional SI prefix (i.e., k/K/m/M/g/G); the result will store
 * the number in base units; the return value of zero indicates
 * failure to parse
 ******************************************************************************/
unsigned int parse_size_string(const char *str, unsigned int *result) {
  unsigned int acc = 0;
  unsigned int divisor = 1;
  unsigned int i = 0;
  /* Check for at least one digit */
  if (!isdigit(str[i])) return 0;

  /* Check for digits */
  while (isdigit(str[i])) {
    acc = acc * 10 + (str[i] - '0');
    i++;
  }

  /* Check for decimal places */
  if (str[i] == '.') {
    i++;
    /* Check for at least one digit */
    if (!isdigit(str[i])) return 0;
    /* Check for digits */
    while (isdigit(str[i])) {
      acc = acc * 10 + (str[i] - '0');
      divisor = divisor * 10;
      i++;
    }
  }


  /* Check for modifier */
  if (str[i] == 'k' || str[i] == 'K') {
    acc = acc * 1024;
    i++;
  } else if (str[i] == 'm' || str[i] == 'M') {
    acc = acc * 1024 * 1024;
    i++;
  } else if (str[i] == 'g' || str[i] == 'G') {
    acc = acc * 1024 * 1024 * 1024;
    i++;
  }
  if (str[i] != 0) return 0;
  
  *result = (int)(acc/divisor);
  return 1;
}


/*******************************************************************************
 * This function prints a gnuplot xtics definition statement to the
 * specified out file pointer.  The tics are spaced between low_in and
 * high_in logarithmically, and they are formatted with standard byte
 * suffixes.
 ******************************************************************************/
void set_pretty_log_xtics(FILE *out, const unsigned int low_in, const unsigned int high_in) {
  const unsigned int bins = 12;
  const unsigned int high = (unsigned int)(ceil(log(high_in)/log(2.0)));
  const unsigned int low = (unsigned int)(floor(log(low_in)/log(2.0)));
  unsigned int major_step = (((high - low + 1) + (bins*2-1)) / (bins*2)) * 2;
  unsigned int minor_step = (major_step + 3) / 4;
  unsigned int i;
  assert(major_step > 0);
  assert(minor_step > 0);
  fprintf(out, "set xtics (");
  for (i = low ; i <= high ; i += major_step) {
    const unsigned int tic = (unsigned int)pow(2.0, i);
    if (i > low) fprintf(out, ", ");
    fprintf(out, "'%s' %u 0", pp_size(tic), tic);
  }
  for (i = low ; i <= high ; i += minor_step) {
    const unsigned int tic = (unsigned int)pow(2.0, i);
    fprintf(out, ", ");
    fprintf(out, "'%s' %u 1", pp_size(tic), tic);
  }
  fprintf(out, ")\n");
}

/*******************************************************************************
 * This function prints a gnuplot xtics definition statement to the
 * specified out file pointer.  The tics are spaced between low_in and
 * high_in linearly, and they are formatted with standard byte
 * suffixes.
 ******************************************************************************/
void set_pretty_lin_xtics(FILE *out, const unsigned int low_in, const unsigned int high_in) {
  const unsigned int bins = 12;
  unsigned int step = (unsigned int)pow(2, ceil(log((high_in-low_in)/(bins*2))/log(2.0))) * 2;
  if (step < 1) step = 1;
  {
    const unsigned int low = (unsigned int)(step*floor((double)low_in/(double)step));
    const unsigned high = (unsigned int)(step*ceil((double)high_in/(double)step));
    unsigned int i;
    fprintf(out, "set xtics (");
    for (i = low ; i <= high ; i += step) {
      if (i > low) fprintf(out, ", ");
      fprintf(out, "'%s' %u", pp_size(i), i);
    }
    fprintf(out, ")\n");
  }
}


/*******************************************************************************
 * Rounds the double precision argument x to the specified number of
 * decimal places and retuns the value.
 ******************************************************************************/
double round_decimal_places(const double x, const unsigned int dp) {
  double ret = x * pow(10, dp);
  ret = floor(ret + 0.5);
  ret = ret / pow(10, dp);
  return ret;
}

/*******************************************************************************
 * Returns a character string representation of the double precision
 * argument that uses standard suffixes for bytes (B, KB, MB, GB,
 * etc).  The character string pointer that is returned cannot be used
 * indefinitely, since it is a static variable...it's designed to be
 * used as an argument to printf and then discarded.  To allow
 * multiple calls to pp_size in the same statement, the static buffer
 * is allocted as an array of buffers and each call uses the next
 * (with wrap-around) buffer in the array.  This function supports up
 * to 16 calls before the same buffer is used again.  This function is
 * NOT thread safe.
 ******************************************************************************/
const char * pp_size(const double size) {
  static char buffer[16][64];
  static unsigned int index = 0;
  const unsigned int dp = 1;
  char *ptr = buffer[index++];
  index = index % 16;
  if (size >= 1024*1024*1024)
    sprintf(ptr, "%gGB", round_decimal_places((double)size / 1024.0 / 1024.0 / 1024.0, dp));
  else if (size >= 1024*1024)
    sprintf(ptr, "%gMB", round_decimal_places((double)size / 1024.0 / 1024.0, dp));
  else if (size >= 1024)
    sprintf(ptr, "%gKB", round_decimal_places((double)size / 1024.0, dp));
  else
    sprintf(ptr, "%gB", round_decimal_places((double)size, dp));
  return ptr;
}

void **malloc_2D_array(unsigned int d1, unsigned int d2, unsigned int element_size) {
  void **ptrs = (void**)malloc(d1 * sizeof(void *));
  unsigned int i = 0;
  if (ptrs == 0) return 0;
  for (i = 0 ; i < d1 ; i++) {
    ptrs[i] = malloc(element_size * d2);
    if (ptrs[i] == 0) return 0; /* TODO: should really clean up the other allocated memory... */
  }
  return ptrs;
}

void free_2D_array(void **array, unsigned int d1, unsigned int d2) {
  unsigned int i = 0;
  for (i = 0 ; i < d1 ; i++) {
    free((void*)array[i]);
  }
  free((void*)array);
}


double traverse_list_rel(const unsigned int n, const int data []) {
  unsigned int i;
  
  const int *ptr = data + data[0];
  double clock_start = 0.0;
  double clock_stop = 0.0;

  /* Run and time the test */
  ptr = data + data[0];
  clock_start = PACE_start_clock();
  for (i = n ; i > 0 ; i--) {
    ptr = ptr + (*ptr);
  }
  clock_stop = PACE_stop_clock();

  {
    volatile int sink;
    sink = (*ptr);
  }
  
  return clock_stop - clock_start;
}

unsigned int traverse_list_rel_until_threshold(const unsigned int min_n, const int data [], const double threshold) {
  const unsigned int max_trials = 10;
  double n = (min_n > 0 ? (double)min_n : 1.0);
  double d = 0.0;
  unsigned int trials_left = 3;
  for (trials_left = max_trials ; trials_left > 0 ; trials_left--) {
    d = traverse_list_rel((unsigned int)n, data);
    if (d < threshold) {
      n = n * 1.5;
      trials_left = max_trials;
    }
  }
  return (unsigned int)n;
}

unsigned int traverse_empty_list_rel_until_threshold(const unsigned int min_n, const double threshold) {
  const int data[] = {0};
  return traverse_list_rel_until_threshold(min_n, data, threshold);
}


double traverse_list_abs(const unsigned int n, const void * const data []) {
  unsigned int i;
  
  const void * const * ptr = data[0];
  double clock_start = 0.0;
  double clock_stop = 0.0;

  /* Run and time the test */
  ptr = data[0];
  clock_start = PACE_start_clock();
  for (i = n ; i > 0 ; i--) {
    ptr = (*ptr);
  }
  clock_stop = PACE_stop_clock();

  {
    const void * const * volatile sink;
    sink = ptr;
  }

  return clock_stop - clock_start;
}

void dump_list_abs(FILE *csv_out, const unsigned int n, const void * const data []) {
  unsigned int i;
  const void * const * ptr = data[0];

  /* Run and time the test */
  ptr = data[0];
  for (i = n ; i > 0 ; i--) {
    fprintf(csv_out, "%p\n", (void*)ptr);
    ptr = (*ptr);
  }
  return;
}


unsigned int traverse_list_abs_until_threshold(const unsigned int min_n, const void * const data [], const double threshold) {
  const unsigned int max_trials = 10;
  double n = (min_n > 0 ? (double)min_n : 1.0);
  double d = 0.0;
  unsigned int trials_left = 3;
  for (trials_left = max_trials ; trials_left > 0 ; trials_left--) {
    d = traverse_list_abs((unsigned int)n, data);
    if (d < threshold) {
      n = n * 1.5;
      trials_left = max_trials;
    }
  }
  return (unsigned int)n;
}

unsigned int traverse_empty_list_abs_until_threshold(const unsigned int min_n, const double threshold) {
  const void * data[1];
  data[0] = &data;
  return traverse_list_abs_until_threshold(min_n, data, threshold);
}


double traverse_list_abs_with_confidence(const unsigned int n,
                                         const void * const data [],
                                         const double confidence)
{
  unsigned int count = 1;
  double mean = traverse_list_abs(n, data);
  double best = mean;
  double sse = 0.0;
  unsigned int i = 0;
  const unsigned int init_samples = 5;
  const unsigned int max_samples = 50;

  /* for (i = 0 ; i < init_samples-1 ; i++) { */
  i = 0;
  while (count < max_samples &&
         (count < init_samples || (sqrt(sse/(double)(count-1))/mean > 0.05))) {
    double duration;
    double new_mean;
    duration = traverse_list_abs(n, data);
    count += 1;
    new_mean = mean + (duration - mean)/count;
    sse = sse + (duration - mean) * (duration - new_mean);
    mean = new_mean;
    if (duration < best) best = duration;
    if (0) {
      double var = sse/(double)(count-1);
      double sd = sqrt(var);
      fprintf(stderr, "count=%u mean=%g sd=%g %%=%g\n", count, mean, sd, sd/mean);
    }
  }

  return best;
}

void shuffle_indices(const unsigned int indices_count,
                     unsigned int indices [])
{
  unsigned int i;

  if (debug) {
    fprintf(stderr, "START dump before shuffle_indices\n");
    for (i = 0 ; i < indices_count ; i++) {
      fprintf(stderr, "indices[%u] = %u\n", i, indices[i]);
    }
  }


  /* randomize the order of accesses */
  for (i = 0 ; i < indices_count ; i++) {
    unsigned int rand_index = (int) ((double)(indices_count) * (rand() / (RAND_MAX + 1.0)));
    unsigned int tmp = indices[i];
    indices[i] = indices[rand_index];
    indices[rand_index] = tmp;
  }

  if (debug) {
    fprintf(stderr, "START dump after shuffle_indices\n");
    for (i = 0 ; i < indices_count ; i++) {
      fprintf(stderr, "indices[%u] = %u\n", i, indices[i]);
    }
  }

  return;
}

void shift_indices_to_zero(const unsigned int indices_count,
                           unsigned int indices [])
{
  if (indices_count > 0) {
    unsigned int min = indices[0];
    unsigned int i = 0;
    for (i = 0 ; i < indices_count ; i++) {
      if (indices[i] < min) min = indices[i];
    }
    for (i = 0 ; i < indices_count ; i++) {
      indices[i] = indices[i] - min;
    }
  }
}

void link_indices_rel(const unsigned int data_count,
                      int data[],
                      const unsigned int indices_count,
                      const unsigned int indices [])
{
  unsigned int has_header = 0;
  unsigned int i;

  if (debug) {
    fprintf(stderr, "START dump before link_indices_rel\n");
    for (i = 0 ; i < indices_count ; i++) {
      fprintf(stderr, "indices[%u] = %u\n", i, indices[i]);
    }
  }


  /* Clear the data array */
  if (clear_mem) {
    memset((void*)data, (int)0, (size_t)(data_count * sizeof(data[0])));
  }
  data[0] = 0;

  /* Generate the data array based on the order of the indicies */
  for (i = 0 ; i < indices_count ; i++) {
    if (indices[i] < 0 || indices[i] >= data_count) {
      fprintf(stderr, "ERROR in link_indices_rel: index=%u out of range [%u, %u]\n", indices[i], 0, data_count);
    }
    data[indices[i]] = indices[(i+1) % indices_count] - indices[i];
  }

  if (debug) {
    fprintf(stderr, "START dump after link_indices_rel\n");
    for (i = 0 ; i < indices_count ; i++) {
      fprintf(stderr, "data[%u] = %d --> next_index = %d\n", indices[i], data[indices[i]], indices[i] + data[indices[i]]);
    }
  }

  if (data[0] == 0) {
    data[0] = indices[0];
    has_header = 1;
  }

  /* Double check the result */
  if (check_results) check_linked_indices_rel(data_count, data, indices_count, indices, has_header);
}

static void check_linked_indices_rel(const unsigned int data_count,
                                     const int data[],
                                     const unsigned int indices_count,
                                     const unsigned int indices [],
                                     const unsigned int has_header)
{
  /* Check the data array to make sure the number of entries is
     correct...only if we've cleared the mem first, though */
  if (clear_mem) {
    unsigned int i;
    unsigned int empty_count = 0;
    unsigned int node_count = 0;
    
    /* Check the data array */
    for (i = 0 ; i < data_count ; i++) {
      if (data[i] == 0)
        empty_count++;
      else
        node_count++;
    }
    if (empty_count + node_count != data_count) abort();
    if ((node_count != indices_count && !has_header) ||
        (node_count != indices_count + 1 && has_header) ||
        (node_count == 0 && indices_count != 1)) {
      fprintf(stderr, "Expecting %u data entries, but found %u\n", indices_count, node_count);
      abort();
    }
  }

  /* Check the data array cycle to make sure it's valid */
  {
    const unsigned int start_index = data[0];
    unsigned int index = start_index;
    int offset = 0;
    unsigned int count = 0;
    do {
      if (debug) fprintf(stderr, "offset = %d\n", offset);
      offset = data[index];
      index = index + offset;
      count++;
      if (index < 0 || index >= data_count) {
        fprintf(stderr, "Index %u out of range [%u, %u]\n", index, 0, data_count);
        abort();
      }
    } while (index != start_index);
    
    if(count != indices_count) {
      fprintf(stderr, "count != indices_count:  %u != %u\n", count, indices_count);
      abort();
    }
  }
}

void link_indices_abs(const unsigned int data_count,
                      const void * data[],
                      const unsigned int indices_count,
                      const unsigned int indices [])
{
  unsigned int i;
  unsigned int has_header = 0;

  if (debug) {
    fprintf(stderr, "START dump before link_indices_abs\n");
    for (i = 0 ; i < indices_count ; i++) {
      fprintf(stderr, "indices[%u] = %u\n", i, indices[i]);
    }
  }


  /* Clear the data array */
  if (clear_mem) {
    memset((void*)data, (int)0, (size_t)(data_count * sizeof(data[0])));
  }
  data[0] = 0;

  /* Generate the data array based on the order of the indicies */
  for (i = 0 ; i < indices_count ; i++) {
    if (indices[i] < 0 || indices[i] >= data_count) {
      fprintf(stderr, "ERROR in link_indices_abs: index=%u out of range [%u, %u]\n", indices[i], 0, data_count);
    }
    data[indices[i]] = &(data[indices[(i+1) % indices_count]]);
  }

  if (debug) {
    fprintf(stderr, "START dump after link_indices_abs\n");
    for (i = 0 ; i < indices_count ; i++) {
      fprintf(stderr, "data[%u] = %p --> next_ptr = %p\n", indices[i], (void*)&data[indices[i]], (void*)data[indices[i]]);
    }
  }

  if (data[0] == 0) {
    data[0] = &(data[indices[0]]);
    has_header = 1;
  }

  /* Double check the result */
  if (check_results) check_linked_indices_abs(data_count, data, indices_count, indices, has_header);
}

static void check_linked_indices_abs(const unsigned int data_count,
                                     const void * const data[],
                                     const unsigned int indices_count,
                                     const unsigned int indices [],
                                     const unsigned int has_header)
{
  /* Check the data array to make sure the number of entries is
     correct...only if we've cleared the mem first, though */
  if (clear_mem) {
    unsigned int i;
    unsigned int empty_count = 0;
    unsigned int node_count = 0;

    /* Check the data array */
    for (i = 0 ; i < data_count ; i++) {
      if (data[i] == 0)
        empty_count++;
      else
        node_count++;
    }
    if (empty_count + node_count != data_count) abort();
    if ((node_count != indices_count && !has_header) ||
        (node_count != indices_count + 1 && has_header) ||
        (node_count == 0 && indices_count != 1)) {
      fprintf(stderr, "Expecting %u data entries, but found %u\n", indices_count, node_count);
      abort();
    }
  }

  /* Check the data array cycle to make sure it's valid */
  {
    const void * const * const start_ptr = data[0];
    const void * const * ptr = start_ptr;
    unsigned int count = 0;
    do {
      if (debug) fprintf(stderr, "ptr = %p\n", (void*)ptr);
      ptr = (*ptr);
      count++;
      if (ptr < data || ptr >= data + data_count) {
        fprintf(stderr, "Pointer %p out of range [%p, %p]\n", (void*)ptr, (void*)data, (void*)&(data[data_count]));
        abort();
      }
    } while (ptr != start_ptr);
    
    if(count != indices_count) {
      fprintf(stderr, "count != indices_count:  %u != %u\n", count, indices_count);
      abort();
    }
  }
}



void generate_sequential_indices(const unsigned int indices_count,
                                 unsigned int indices [])
{
  unsigned int i = 0;
  for (i = 0 ; i < indices_count ; i++) {
    indices[i] = i;
  }
  return;
}

void generate_sequential_randomized_indices(const unsigned int indices_count,
                                            unsigned int indices [])
{
  generate_sequential_indices(indices_count, indices);
  shuffle_indices(indices_count, indices);
}

void generate_block_cyclic_indices(const unsigned int indices_count,
                                   unsigned int indices [])
{
  const unsigned int cols = 1024;
  const unsigned int rows = indices_count / cols + 1;
  unsigned int row = 0;
  unsigned int col = 0;
  unsigned int count = 0;

  for (col = 0 ; col < cols ; col++) {
    for (row = 0 ; row < rows ; row++) {
      unsigned int index = row*(cols) + col;
      if (index < indices_count) indices[count++] = index;
    }
  }
}

void generate_double_block_cyclic_indices(const unsigned int indices_count,
                                          unsigned int indices [])
{
  const unsigned int blocks = 64;
  const unsigned int cols = 64;
  const unsigned int rows = indices_count / blocks / cols + 1;
  unsigned int row = 0;
  unsigned int col = 0;
  unsigned int block = 0;
  unsigned int count = 0;

  for (block = 0 ; block < blocks ; block++) {
    for (col = 0 ; col < cols ; col++) {
      for (row = 0 ; row < rows ; row++) {
        unsigned int index = row*(cols*blocks) + col*blocks + block;
        if (index < indices_count) indices[count++] = index;
      }
    }
  }
}

void generate_randomized_double_block_cyclic_indices(const unsigned int indices_count,
                                                     unsigned int indices [])
{
  const unsigned int blocks = 32;
  const unsigned int cols = 32;
  const unsigned int rows = indices_count / (blocks * cols) + 1;

  unsigned int count = 0;

  unsigned int i = 0;
  unsigned int j = 0;
  unsigned int k = 0;

  unsigned int *rows_array = malloc(sizeof(unsigned int) * rows);
  unsigned int *cols_array = malloc(sizeof(unsigned int) * cols);
  unsigned int *blocks_array = malloc(sizeof(unsigned int) * blocks);

  assert(rows_array);
  assert(cols_array);
  assert(blocks_array);

  for (i = 0 ; i < rows ; i++) rows_array[i] = i;
  for (i = 0 ; i < cols ; i++) cols_array[i] = i;
  for (i = 0 ; i < blocks ; i++) blocks_array[i] = i;
  
  shuffle_indices(rows, rows_array);
  shuffle_indices(cols, cols_array);
  shuffle_indices(blocks, blocks_array);

  for (i = 0 ; i < blocks ; i++) {
    const unsigned int block = blocks_array[i];
    for (j = 0 ; j < cols ; j++) {
      const unsigned int col = cols_array[j];
      for (k = 0 ; k < rows ; k++) {
        const unsigned int row = rows_array[k];
        unsigned int index = row*(cols*blocks) + col*blocks + block;
        if (index < indices_count) indices[count++] = index;
      }
    }
  }
  free(rows_array);
  free(cols_array);
  free(blocks_array);
  return;
}



