BEGIN{
    first = 1;
    found_it = 0;
}


{
    measurements[NR] = $2;
}

found_it == 0 {
    if (first == 1)
    {
	last = $2;
	first = 0;
    }
    else
    { 
	if ($2 < (last*1.15))
	    last = $2;
	else
	    found_it = NR-1;
    }
}

END{
    # we hit this funny special case where on some machines, the second
    # time is less than the first; since this implies that they have at
    # least two functional units, we lose nothing by simply setting the
    # first value to be equal to the second
    if (measurements[1] > measurements[2])
	measurements[1] = measurements[2];

    # smooth the data
#    printf "%s\t%s\n", 1, measurements[1];
    for(i=2;i<NR;i++)
    {
	delta_a_b = measurements[i]-measurements[i-1];
	delta_a_c = measurements[i+1]-measurements[i-1];
	if (delta_a_b < 0)
	    delta_a_b = -delta_a_b;
	if (delta_a_c < 0)
	    delta_a_c = -delta_a_c;
	# WHY 10%?!?
	epsilon = .10*measurements[1];
	original = measurements[i];
	if (delta_a_b > epsilon && delta_a_c <= epsilon)
	    measurements[i] = (measurements[i-1]+measurements[i+1])/2;
#	printf "%s\t%s\t%s\n", i, original, measurements[i];
    }

    # we need to find the place where there's an obvious jump in the
    # times -- imagine that a machine had only one functional unit; when
    # we went to two streams, the time ought to jump by about double; and
    # if the machine has three units, then the times for one stream and
    # two streams ought to be fairly close, and the time for three streams
    # ought to be about a third higher; in the general case, for a
    # machine with F functional units, where a single stream takes time T,
    # the equation to describe the expected time for S streams (S > 1)
    # is: T if S <= F, T*(S/F) otherwise; thus, the change in time when
    # we have more streams than functional units is S/F; thus, if we
    # have the same time for 1, 2, and 3 streams, when we compare the
    # time of 4 streams vs. three streams, the delta has to be less
    # than (4/3-1)T = 1/3T; the same calculation holds for five streams:
    # if four streams run about as fast as a single stream, then we
    # can see that four is the maximum number of streams if the time for
    # five streams is more than 5/4T
    # the trick, here, is that as S becomes large (>5, e.g.), the delta
    # gets close to the timer error; we can fix this by looking at the
    # next couple (x) of entries, since their deltas will be (S+x)/F, and
    # bigger values of (S+x)/F will be much larger than the timer error
    found_it2 = 0;
    threshold = 0.95;
    last = measurements[1];
    initial_time = measurements[1];
    functional_units = 1;
    for(streams=2;streams<NR;streams++)
    {
	if (found_it2 == 0)
	{
	    if (measurements[streams] < (initial_time*streams/functional_units))
	    {
		last = measurements[streams];
		functional_units++;
	    }
	    else
		found_it2 = functional_units;
	}
    }

    split(FILENAME, filename_pieces, "_");
    if (version == "2")
	printf "%s\t%s\t%s\n", found_it2, filename_pieces[7], filename_pieces[8];
    else
	printf "%s\t%s\t%s\n", found_it, filename_pieces[7], filename_pieces[8];
}
