#include "config.h" #ifndef BUCKETDECAY #define BUCKETDECAY 90 #endif #define SEEK 1 #define PROP 2 /* can have STRIDE and SPTF defined, for hybrid policy */ #if defined(STRIDE) || defined(YFQ) || defined(LOTTERY) # define SCHED PROP #elif defined(CLOOK) || defined(SPTF) || defined(SSTF) # define SCHED SEEK #else # error define some scheduler #endif #if defined(YFQ) || defined(STRIDE) # define FQ #endif #if defined(noNWCS) # undef noNWCS # define noNWCS 1 # define NWCS 1 #else # define noNWCS 0 #endif #if defined(NWCS) # undef NWCS # define NWCS 1 #else # define NWCS 0 #endif #if defined(onlyTIMER) # undef onlyTIMER # define onlyTIMER 1 #else # define onlyTIMER 0 #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "timer.h" #define assert(x) if (!(x)) printf("assert failed %s:%d\n", __FILE__ , __LINE__); #if 1 # define tprintf(s, x...) do { } while(0) #else # define ELAPSED ({ quad_t t = rdtsc(), x = (t-mark)/MHz; mark = t; x; }) # define tprintf(s, x...) printf("%lld: " s, ELAPSED , ## x) #endif #ifdef DEBUG # define ELAPSED ({ int t = NOW, x = t - mark; mark = t; x; }) # define p(s,x...) printf("%d %c\t" s "\n", ELAPSED, (state<0?'w':state?'0'+state:'i') , ## x) #else # define p(s,x...) do { } while(0) #endif #define size(bp) ((bp) ? bp->b_bcount : -1) #define sizesect(bp) ((size(bp)+511)/512) #define id(bp) ((bp) ? bp->b_id : -1) #define isrw(bp) ((bp)->b_flags & B_READ ? 'r' : 'w') #define isasync(bp) ((bp)->b_flags & B_ASYNC ? 'a' : ' ') #define pos(bp) ((bp) ? (int)bp->b_pblkno : -1) #define bproc(bp) ((bp) ? (bp)->nwcs_bproc : 0) #define bpid(bp) (bproc(bp) ? _id(bproc(bp)->p_pid) : '-') #define pinfo(p) ((struct info *)((p)->ext_data)) #define bpinfo(bp) (*pinfo(bproc(bp))) void timeout_expire(void); char _id(int x) { static int table[26]; static int n = 0; int i; for (i = 0; i < n; i++) if (table[i] == x) return 'a' + i; if (n == 26) return '-'; table[n] = x; return 'a' + n++; } static quad_t tput = 0, ktput = 0, util = 0; static int nreq = 0; static int m_tput = 0, m_ktput = 0, m_util = 0, m_n = 0; static int m_done = 0, m_start = 0; static struct proc *m_killproc[10] = {0}; int n_killproc = 0; static quad_t t_lastiodone = 0; /* for scsi */ quad_t mark = 0; int requestnum = 0; volatile int pending = 0; enum STATE { WAIT = -1, IDLE = 0 /*, BUSY = 1,2,3.. */ } state = IDLE; volatile struct buf *next = 0, _last, *last = 0; volatile int expired = 0; int asptf_switch = 0; extern void (*ext_enqueue)(struct buf_queue_head *, struct buf *); extern void (*ext_iodone)(struct buf *); extern struct buf *(*ext_dequeue)(struct buf_queue_head *); extern int (*ext_hasany)(struct buf_queue_head *); extern void (*disk_doIO)(void*); extern void _bufqdisksort(struct buf_queue_head *, struct buf *); extern int force_dequeue_now; #define oenq _bufqdisksort #define NOW (rdtsc()/MHz) #define MAXDIST 8000 struct info { quad_t bucket[MAXTIME/TIMEGRAIN]; quad_t expected_seek_distance; int expected_seek_direction; quad_t expected_thinktime; quad_t expected_most_thinktime; quad_t expected_positioning_time; int prev_location; quad_t prev_finish_time; int blocked; int rc; }; const int decay3 = 37; /* decay 95% in 3 calls */ const int decay5 = 55; /* decay 95% in 5 calls */ const int decay10 = 74; /* decay 95% in 10 calls */ const int decay20 = 86; /* decay 95% in 20 calls */ #ifndef DECAY #define DECAY decay10 #endif #define DISTSCALE 100 #if SCHED == SEEK #include "seek.h" #else #include "prop.h" #endif void out(struct buf *bp, char *cmd) { int sizeperdash = (20*1024*1024*2)/40; struct buf *b; char s[41]; int i; for (i = 0; i < 40; i++) s[i] = '-'; s[40] = 0; sched_foreach(b) { int n = pos(b)/sizeperdash; if (s[n] == '-') s[n] = _id(bproc(b)->p_pid); else s[n] = '+'; } if (bp) { int off = pos(bp)/sizeperdash; if (s[off] >= 'a' && s[off] <= 'z') s[off] += 'A' - 'a'; else s[off] = '~'; p("%s(%c)\t%d %c%c @%d/%ld \t%s", cmd, bpid(bp), id(bp), isrw(bp), isasync(bp), pos(bp), sizesect(bp), s); } else { p("%s\t\t\t\t\t%s", cmd, s); } } /* static char tmp_lastnew; */ /* change splbio's to splhigh's in ata/ata-disk.c and scsi/scsi_da.c */ /* also splcam -> splhigh? not sure */ static void enqueue(struct buf_queue_head *bufq, struct buf *new) { if (major(new->b_dev) != MAJOR || dkunit(new->b_dev) != UNIT) { oenq(bufq, new); return; } /* printf("enq %d\n", curproc ? curproc->p_pid : -1); */ #if 0 struct buf *bp; sched_foreach(bp) { uprintf("%c/%d/%lld:%lld/%lld ", bpid(bp), id(bp), (ll_microtime() - bp->b_t_enqueue), (ll_microtime()), (bp->b_t_enqueue)); } #endif if (m_done) { int i; uprintf("-----! snitch !----- avg %d %d %d\n", m_tput/m_n, m_ktput/m_n, m_util/m_n); for (i = 0; i < n_killproc; i++) if (m_killproc[i]) psignal(m_killproc[i], SIGKILL); n_killproc = 0; m_done = 0; } new->b_id = (++requestnum); new->bufq = bufq; new->nwcs_bproc = curproc; /* tmp_lastnew = bpid(new); */ /* uprintf("new:%c/%d\n", bpid(new), id(new)); */ /* if (NWCS && (new->b_flags & B_READ)) */ if (NWCS) sched_collect_stats(new); sched_enqueue(new); out(new, "enq"); pending++; /* .. and nothing till now .. */ if (bproc(new) && (new->b_flags & B_READ) && (!(new->b_flags & B_ASYNC) || state == WAIT)) bpinfo(new).blocked = 1; } static int /* called from interrupt context */ _hasany(struct buf_queue_head *bufq) /* no side effects */ { int duration; if (pending == 0) { p(" --------- empty"); return 0; /* outside schedule() */ /* } else if (state > 0) { */ /* p(" --------- busy %d\n", state); */ /* XXX disables tagged queueing */ /* return 0; */ } else if (expired) { struct buf *tmpnext; assert(state == WAIT); /* "next" already contains the request to be scheduled */ tmpnext = sched_select(); assert(next == tmpnext); next = tmpnext; /* expired = 0; */ out((struct buf *) next, "exp"); } else if (last && bproc(last) && bpinfo(last).blocked) { next = sched_select(); out((struct buf *) next, "lpb"); } else if (last && bproc(last) && already_exists_in_q(bproc(last))) { next = sched_select(); out((struct buf *) next, "inq"); } else { /* assert(state == WAIT || state == IDLE); for IDE */ /* anything for SCSI */ asptf_switch = 0; next = sched_select(); /* XXX flags.. */ out((struct buf *) next, "sel"); duration = (NWCS && !force_dequeue_now && !asptf_switch && (next->b_flags & B_READ)) ? sched_evaluate(last, next) : 0; /* if (asptf_switch) bpinfo(next).skiponestats = 1; */ asptf_switch = 0; if (duration > 0) { if (state == IDLE) { state = WAIT; p("setting timeout %d", duration); set_timeout(duration, timeout_expire); } else { /* WAIT, or for SCSI, BUSY */ /* don't retrigger timeout */ ; } return 0; } } return 1; } static int /* called from interrupt context */ hasany(struct buf_queue_head *bufq) /* no side effects */ { int s = splhigh(); int x = _hasany(bufq); splx(s); return x; } static struct buf * /* called from interrupt context */ dequeue(struct buf_queue_head *bufq) { int s = splhigh(); struct buf *bp; struct buf *tmpnext; if (_hasany(bufq) == 0) { splx(s); return NULL; } /* "next" already contains the request to be scheduled */ tmpnext = sched_select(); assert(next == tmpnext); next = tmpnext; if (expired) expired = 0; out((struct buf *) next, "srv"); sched_dequeue((struct buf *) next); pending--; if (state == WAIT) { p("cancel timeout"); cancel_timeout(); state = IDLE; } state++; /* -> BUSY */ bp = (struct buf *) next; last = &_last; memcpy((void*)last, (void*)next, sizeof(struct buf)); next = NULL; splx(s); return bp; } void timeout_expire(void) { assert(state == WAIT); expired = 1; p("expire timeout"); assert(pending > 0); if (!disk_doIO) printf("ERROR: disk_doIO NULL\n"); else disk_doIO((void*)0); } static void /* called from interrupt context */ iodone(struct buf *bp) { int s = splhigh(); quad_t t, u; if (bp->b_id == 0) { splx(s); return; } out(bp, "iod"); bp->b_id = 0; assert(state > 0 /* == BUSY */); if ((bp->b_flags & B_READ)) bpinfo(bp).blocked = 0; sched_iodone(bp); ktput += (t = bp->b_bcount); util += (u = bp->b_t_iodone - qmax(bp->b_t_iostart, t_lastiodone)); t_lastiodone = bp->b_t_iodone; #if SCHED == PROP RC(bp).ktput += t; RC(bp).util += u; #endif state--; if (bproc(bp)) bpinfo(bp).prev_finish_time = NOW; /* #ifdef XXX_SCSI */ /* if (!disk_doIO) printf("ERROR: disk_doIO NULL\n"); */ /* else disk_doIO((void*)1); */ /* #endif */ splx(s); } extern struct malloc_type M_SUBPROC[1]; static void fork_pinfo(struct proc *parent, struct proc *child, int flags) { struct proc *p = child; if (p == NULL) { printf("crapadoodle\n"); return; } MALLOC(pinfo(p), struct info *, sizeof(struct info), M_SUBPROC, M_WAITOK); if (pinfo(p) == NULL) { printf("crapadooodle\n"); return; } bzero((char*)pinfo(p), sizeof(struct info)); /* pinfo(p)->expected_seek_distance = MAXDIST; */ /* pinfo(p)->expected_thinktime = MAXTIME; */ /* pinfo(p)->expected_most_thinktime = MAXTIME; */ pinfo(p)->expected_seek_distance = 1; pinfo(p)->expected_thinktime = 1; pinfo(p)->expected_most_thinktime = MAXTIME; if (parent && pinfo(parent)) pinfo(p)->rc = pinfo(parent)->rc; } static void exit_pinfo(struct proc *p) { int i; for (i = 0; i < n_killproc; i++) if (m_killproc[i] == p) m_killproc[i] = NULL; while (n_killproc > 0 && m_killproc[n_killproc-1] == NULL) n_killproc--; /* compact */ FREE(pinfo(p), M_SUBPROC); } static struct callout_handle measure_handle; int nmeasure = 0; static void measure_func(void *arg) { int s = splhigh(); #if SCHED == PROP int i; #endif if (m_start) { #if SCHED == PROP for (i = 0; i < NRC; i++) { printf(" %c %d %lld %lld %lld ", 'a'+i, rc[i].share/100, rc[i].tput/1024, rc[i].ktput/1024, (rc[i].util+500)/1000); } printf("total "); #endif printf("%lld %lld %lld %d\n", tput/1024, ktput/1024, (util+500)/1000, nreq); if (nmeasure >= FROM && nmeasure < TO) { m_tput += tput/1024; m_ktput += ktput/1024; m_util += (util+500)/1000; m_n++; #if SCHED == PROP for (i = 0; i < NRC; i++) { rc[i].m_tput += rc[i].tput / 1024; rc[i].m_ktput += rc[i].ktput / 1024; rc[i].m_util += (rc[i].util+500)/1000; } #endif } else if (nmeasure == TO) { printf("avg "); #if SCHED == PROP for (i = 0; i < NRC; i++) { printf(" %c %d %d %d %d ", 'a'+i, rc[i].share/100, rc[i].m_tput/m_n, rc[i].m_ktput/m_n, rc[i].m_util/m_n); } printf("total "); #endif printf("%d %d %d %d\n", m_tput/m_n, m_ktput/m_n, m_util/m_n, nreq); m_done = 1; m_start = 0; } nmeasure++; } ktput = tput = util = 0; #if SCHED == PROP for (i = 0; i < NRC; i++) rc[i].tput = rc[i].ktput = rc[i].util = 0; #endif #if 0 { struct buf *bp; int i; if (n_killproc) { for (i = 0; i < n_killproc; i++) if (m_killproc[i]) printf("%c %lld %lld ", _id(m_killproc[i]->p_pid), pinfo(m_killproc[i])->expected_thinktime, pinfo(m_killproc[i])->expected_most_thinktime); printf("bum\n"); } printf("%c a:%lld ", tmp_lastnew, tmp_a_thinktime); sched_foreach(bp) { printf("%c/%d/%lld-%lld.%lld.%lld ", bpid(bp), id(bp), (ll_microtime() - bp->b_t_enqueue), bpinfo(bp).expected_seek_distance, bpinfo(bp).expected_thinktime, bpinfo(bp).expected_most_thinktime); } printf("\n"); } #endif measure_handle = timeout(measure_func, 0, hz); splx(s); } extern u_int32_t cam_dflags; static void init() { struct proc *p; int s; determine_MHz(); printf("------------------------------------------\n"); s = splhigh(); LIST_FOREACH(p, &allproc, p_list) fork_pinfo(0, p, 0); at_fork(fork_pinfo); at_exit(exit_pinfo); sched_init(); ext_dequeue = dequeue; ext_hasany = hasany; ext_enqueue = enqueue; ext_iodone = iodone; splx(s); if (NWCS || onlyTIMER) timeout_init(1000000/TIMEGRAIN); measure_handle = timeout(measure_func, 0, hz); mark = rdtsc(); #ifdef DI_CAM_DEBUG cam_dflags = 0xff; #endif } static void deinit(void) { struct buf *bp; struct proc *p; int s; #ifdef DI_CAM_DEBUG cam_dflags = 0; #endif if (NWCS || onlyTIMER) timeout_deinit(); s = splhigh(); ext_dequeue = NULL; ext_hasany = NULL; ext_enqueue = NULL; ext_iodone = NULL; while ((bp = sched_select()) != 0) { sched_dequeue(bp); oenq(bp->bufq, bp); } LIST_FOREACH(p, &allproc, p_list) exit_pinfo(p); if (rm_at_fork(fork_pinfo) == 0) printf("at_fork not removed\n"); if (rm_at_exit(exit_pinfo) == 0) printf("at_exit not removed\n"); splx(s); untimeout(measure_func, 0, measure_handle); if (curproc) sync(curproc, NULL); } static int modevent(struct module *mod, int type, void *arg) { switch (type) { case MOD_LOAD: init(); break; case MOD_UNLOAD: deinit(); break; case MOD_SHUTDOWN: break; } return 0; } static int syscall_func(struct proc *p, int *uap) { p->p_retval[0] = 0; if (uap[0] == 0) { m_start = 1; } else if (uap[0] == 1) { tput += uap[1]; nreq ++; #if SCHED == PROP rc[p && pinfo(p) && pinfo(p)->rc >= 0 && pinfo(p)->rc < NRC ? pinfo(p)->rc : 0].tput += uap[1]; #endif } else if (uap[0] == 2) { if (p && pinfo(p) && uap[1] < NRC) pinfo(p)->rc = uap[1]; else return EINVAL; printf("binding %s to container #%d\n", p ? p->p_comm : "", uap[0]); } else if (uap[0] == 3) { if (n_killproc < 10) m_killproc[n_killproc++] = p; } else if (uap[0] == 4) { #ifdef APP_NOTIFYBLOCKED pinfo(p)->blocked = 1; p("%c blocked\n", _id(p->p_pid)); if (state == WAIT && pending > 0) { /* force timeout cancel */ expired = 1; if (!disk_doIO) printf("ERROR: disk_doIO NULL\n"); else disk_doIO((void*)2); } #endif } else if (uap[0] == 5) { p->p_retval[0] = pinfo(p)->expected_most_thinktime + 1000; } return 0; } #define syscall_number 216 static struct sysent newent = { 2, (sy_call_t *) syscall_func }; static int syscallnumber = syscall_number; SYSCALL_MODULE(KMOD, &syscallnumber, &newent, modevent, NULL);