This appendix summarizes the implementation of the NWCS waiting mechanism,
and separate heuristics for the SPTF (and C-LOOK) and Stride schedulers.
This should facilitate easy implementation and deployment of NWCS in current
operating systems. This does not include pseudocode for measuring disk
characteristics and building an approximate positioning time calculator. For
even greater detail, please refer to our prototype implementation. The
source code is made publicly available on the project webpage, hosted
at [ID01a].
1.0
// sched_{enqueue,dequeue,finish,collect_stats,evaluate}
// are the scheduler-specific functions described later
enum STATE { WAIT = -1, IDLE = 0, BUSY = 1,2,3,... } state = IDLE;
integer pending = 0;
boolean expired = false;
// these four functions should set the higher interrupt priority level
// of the timer (splclock or splhigh), rather than of disk I/O (splbio)
issue(new):
// gets invoked by upper layers of the kernel
// state == IDLE or BUSY or WAIT
if (new.is_read_request) sched_collect_stats(new);
sched_enqueue(new);
pending++;
if (new.is_read_request && (new.is_sync_request || state == WAIT))
new.proc.blocked = true; // little hope in waiting for this process
if (state == IDLE || state == WAIT) schedule();
schedule():
// internal function. sometimes takes a request from the pool and
// services it; otherwise just returns with the timeout activated.
// called with pending > 0
static request last, next; // retain values across calls
if (expired)
| // state == WAIT
| expired = false;
| next = sched_choose(); // need this for SCSI only
else if (last.proc.blocked)
| // state == WAIT
| next = sched_choose();
else
| // state == IDLE or WAIT, or BUSY if SCSI
| next = sched_choose()
| duration = next.is_read_request ?
| sched_evaluate(last, next) : 0;
| if (duration > 0)
| | if (state == IDLE)
| | state = WAIT;
| | set_timeout(duration);
| | else
| | // do nothing; in particular, don't retrigger timeout
|_ |_ return; // "next" contains the chosen candidate
next = sched_choose(); // probably need this for SCSI
sched_dequeue(next);
pending--;
if (state == WAIT) { cancel_timeout(); state = IDLE; }
state++; // idle->busy, busy->busier
last = next; next = NULL;
PERFORM_DISK_IO(last); // run request on disk
timeout_expire():
// gets called by timeout code, in interrupt context
// state == WAIT, pending > 0, next != NULL
expired = true;
schedule();
finish(req):
// gets called by disk driver when current finishes execution
// state == BUSY
state--; // busy:1->idle, busier->busy
sched_finish(req);
req.proc.prev_finish_time = NOW;
if (pending) schedule();
1.5
sched_collect_stats(new):
// every process has:
// buckets:[0..N] with N = 30 (i.e. least count = 500us)
// expected_thinktime, expected_most_thinktime
// expected_seek_distance, expected_seek_direction
// expected_positioning_time
if (req \in scheduler_queue
&& req.is_read_request && req.proc == new.proc)
thinktime = 0
else
thinktime = min(15ms, NOW - new.proc.prev_finish_time);
// the process issuing "new" computes for some time
// between its finish time for previous request and NOW
loop:i:(0..N) new.proc.bucket[i] *= 0.9 // decay all buckets
new.proc.bucket[(thinktime/15ms) * N] += 1; // increment this bucket
new.proc.expected_thinktime = median(new.proc.buckets)
new.proc.expected_most_thinktime = 95percentile(new.proc.buckets)
seekdist = ABS(new.location - new.proc.prev_location)
seekdirection = (new.location > new.proc.prev_location) ? 1 : 0;
new.proc.prev_location = new.location; // update prev_location
new.proc.expected_seek_distance *= 0.74
new.proc.expected_seek_distance += seekdist
new.proc.expected_seek_direction *= 0.74 // used by C-LOOK heuristic
new.proc.expected_seek_direction += seekdirection
new.proc.expected_positioning_time =
calculate_postime(new.proc.expected_seek_distance)
// we use an approximation of positioning time based on seek
// distance; an exact version would need some more arguments
sched_evaluate(last, next):
next_seek_distance = ABS(next.location - last.location)
next_seek_direction = next.location > last.location;
next_positioning_time = calculate_postime(next_seek_distance)
||<-- ADDITIONAL CHECK FOR THE C-LOOK HEURISTIC ONLY
||
|| if (next_seek_direction == 1 /* ahead */
|| | && last.proc.expected_seek_direction < 0.3 /* behind */)
|| |____ return 0 // proceed immediately
||
|| if (next_seek_direction == 0 /* behind */
|| | && last.proc.expected_seek_direction > 0.7 /* ahead */)
|| |____ return max(last.proc.expected_most_thinktime - elapsed, 0)
// time elapsed since we finished servicing previous request.
// this time needs to be deducted from both expected_thinktimes.
elapsed = NOW - last.proc.prev_finish_time;
last_expected_time = last.proc.expected_positioning_time +
max(last.proc.expected_thinktime - elapsed, 0)
if (next_positioning_time < last_expected_time)
| return 0; // no point waiting; proceed immediately with "next"
else
| return max(last.proc.expected_most_thinktime - elapsed, 0)
| // If we wait for 95 percentile of thinktime, then with 95%
| // probability, we would expect to receive the next request
|____ // before we timeout. This time is bounded by 15ms.
1.5
sched_collect_stats(new): reduced version of (or same as) that for SPTF
sched_evaluate(last, next): // PROCESS VERSION
minclock = min{ proc[i].clock, where (proc[i].pending > 0) }
elapsed = NOW - last.proc.prev_finish_time;
if (last.proc.pending == 0
| && last.proc.clock < minclock
| && max(last.proc.expected_thinktime - elapsed, 0) < 3ms)
| return max(last.proc.expected_thinktime - elapsed, 0)
else
|____ return 0;
sched_evaluate(last, next): // RESOURCE CONTAINER VERSION
minclock = min{ rc[i].clock, where (rc[i].pending > 0) }
elapsed = NOW - last.proc.prev_finish_time;
if (last.proc.rc.pending == 0
| && last.proc.rc.clock < minclock
| && max(last.proc.expected_thinktime - elapsed, 0) < 3ms)
| return max(last.proc.expected_thinktime - elapsed, 0)
else
|____ return 0;
1.5