#!/usr/bin/env python # # DrScheme binary file source extractor # # Caveat: severely untested; this may work, or the little man in the box may # jump out and chop your fingers off # # - dsandler@cs.rice.edu, 9/12/05 import re, sys program = '' for line in sys.stdin.readlines(): # linefeeds schminefeeds line = re.sub(r'\015\012',r'\012',line) # this is bad line = re.sub(r'\000\000;','',line) # remove ugly characters line = re.sub(r'''[^-A-Za-z0-9()\[\]*;+=|#\/\n '"?\[\]<>.!]''','',line) # if your program has lines like # (+ 1 2);do some stuff # you will have problems line = re.sub(r'([^; ]);',r'\1 ;',line) # often (foo will be broken as (\nfoo, which is cosmetic, but 'foo will # also break as '\nfoo, which is wrong. line = re.sub(r'''(['\(\[#])\n''', r'\1', line) # the numeric tower topples over. line = re.sub(r'(\d+/\d+)decim(al1)?',r'\1', line) program += line # this is the part I'm most concerned about---how do we know where the header # garbage ends? program = re.sub(r'(?s)^.*\?\?\?\?\?\?\?\?\?\?\?\?\?\?\?\?\?','',program) sys.stdout.write(program)