#cs(module lexer mzscheme (require (lib "etc.ss")) (require (file "abs.ss")) (provide make-lexer make-file-lexer make-string-lexer) (provide (all-from (file "abs.ss"))) ;; Symbols denoting primitive functions (define PRIMITIVE (list 'number? 'function? 'arity 'list? 'null? 'cons? 'cons 'first 'rest)) ;; ref? will be added in subsequent assignments ;; Symbols that are keywords (define KEYWORD (list 'if 'then 'else 'map 'to 'let 'in 'null 'true 'false)) ;; Characters that are the lead characters of operators. (define OPERATOR (list #\+ #\- #\* #\/ #\~ #\= #\< #\> #\& #\| #\: #\!)) ;; 'ref, and '<- will be added as operators in subsequent assignments but ;; they must be treated as special cases ;; Delimiter character (define DELIMITER (list #\( #\) #\[ #\] #\, #\;)) ;; #\{ and #\} will be added in subsequent assignments ;; Whitespace character (define WHITE-SPACE (list #\newline #\space #\tab)) ;; a delimiting character (define delimiting (append DELIMITER OPERATOR WHITE-SPACE)) ;; char -> boolean ;; (digit? c) where c is a character returns true iff c is a digit (define digit? (lambda (c) (char<=? #\0 c #\9))) ;; pre: c is a character ;; char -> boolean ;; (alphanumeric? c) where c is a character returns true iff c is an alphabetic character or ;; A digit (define alphanumeric? (lambda (c) ;; pre: c is any value (and (char? c) (or (char<=? #\a c #\z) (char<=? #\A c #\Z) (digit? c) (member c '(#\_ #\?)))))) ;; (any char) -> boolean ;; (charEq? o c) returns true iff o is the same character as c ;; Note: if o is not a char (char=? o c) generates a run-time error! eof is not a char (define charEq? (lambda (o c) (and (char? o) (char=? o c)))) ;; (any -> port) -> (-> Token) ;; (make-lexer pm) takes a port maker ml and returns a function ml ;; such that (ml input) returns the stream of tokens on port (ml input) (define make-lexer (lambda (make-port) (lambda (input) (let* ([port (make-port input)] [peek (read-char port)]) (will-register (make-will-executor) port close-input-port) (local ; buffer used to hold token returned as PEEK [(define first '()) ;; -> Token ; (loop) returns next token [variable, primtive, keyword, number, or delimiter] ; in char stream (define loop (lambda () ; port starting with peek ; sets peek to first char following the quasi-token (cond [(eof-object? peek) (close-input-port port) peek] [(member peek OPERATOR) (let ([firstch peek]) (set! peek (read-char port)) (cond [(member firstch '(#\: #\! #\< #\>)) (if (or (charEq? peek #\=) ;;(and (charEq? firstch #\<) (charEq? peek #\-)) ; added in later assignments ) (begin0 (make-operator (string->symbol (string firstch peek))) (set! peek (read-char port))) (if (member firstch '(#\! #\:)) ;; #\! will be eliminated when ! becomes a legal operator (error 'make-lexer "invalid character: \"~a\"" firstch) (make-operator (string->symbol (string firstch)))))] [(and (char=? firstch #\/) (charEq? peek #\/)) ; comment found, skip to end of line (let innerloop ([ch (read-char port)]) (if (charEq? ch #\newline) (begin (set! peek (read-char port)) (loop)) (innerloop (read-char port))))] [else (make-operator (string->symbol (string firstch)))]))] [(member peek WHITE-SPACE) (set! peek (read-char port)) (loop)] [(member peek DELIMITER) (begin0 (make-delimiter (string->symbol (string peek))) (set! peek (read-char port)))] [(alphanumeric? peek) (let [(oldpeek peek)] (let loop ([chars (list peek)]) (let ([next (read-char port)]) (cond [(alphanumeric? next) (loop (cons next chars))] [else (set! peek next) (let [(s (apply string (reverse chars)))] (cond [(string->number s) => (lambda (x) x)] [(digit? oldpeek) ; identifier begins with digit (error 'make-lexer "invalid lead character \"~a\" in identifier" oldpeek)] [else (let [(symbol (string->symbol s))] (cond [(member symbol PRIMITIVE) (make-primitive-token symbol)] [(member symbol KEYWORD) (make-keyword symbol)] ;; [(eq? symbol 'ref) (make-operator symbol)] ; used only after we add imperativity [else (make-variable symbol)]))]))]))))] [else (error 'make-lexer "invalid character: \"~a\"" peek)])))] ; abstraction defining the token stream (lambda args ; args = '() means a normal read operation ; args != '() means PEEK at next symbol to be read but ; don't consume it (if (null? first) (if (null? args) (loop) (begin (set! first (loop)) first)) (if (null? args) (begin0 first (set! first '())) first)))))))) ;; string -> lexer ;; (make-file-lexer s) constructs a lexer for the contents of file s (define make-file-lexer (make-lexer (lambda (s) (open-input-file s 'text)))) ;; string -> lexer ;; (make-string-lexer s) constructs a lexer for the string s (define make-string-lexer (make-lexer open-input-string)) ;; lexer->lexer ;; (add-debugging l) modifies the lexer l so that it prints each token as it is read (define (add-debugging l) (lambda args (local [(define t (apply l args))] (printf "token ~a ~a~n" t (if (null? args) 'read 'peeked)) t))) ;; string -> lexer ;; (make-debug-file-lexer s) constructs a lexer for the contents of file s that prints ;; each token as it is read (define make-debug-file-lexer (lambda (s) (add-debugging (make-file-lexer s)))) ;; string -> lexer ;; (make-debug-string-lexer s) constructs a lexer for string s that prints each token as it is read (define make-debug-string-lexer (lambda (s) (add-debugging (make-string-lexer s)))) )