/* Lexer for Assignment 1, Comp 411, September 2012 * Notes: * 1. This lexer will be slightly refactored in Assignment 2 to accommodate visitor based dispatch on the operator in * UnOpApp or BinOppApp. * 2. The naive definitions of the toString() methods in the AST classes could be rewritten (in much uglier form) using * StringBuilder. Naive string concatenation is very slow because it recopies its first argument. The * compiler should optimize these computations using StringBuilder but apparently does not. */ import java.io.*; import java.util.*; /** Token and AST class definitions */ /** The AST type which support a visitor interface. * AST ::= BoolConstant | IntConstant | NullConstant | Variable | PrimFun | UnOpApp | BinOpApp | App | MapLiteral | If | Let */ interface AST { public T accept(ASTVisitor v); } /** Visitor class for general AST type */ interface ASTVisitor { T forBoolConstant(BoolConstant b); T forIntConstant(IntConstant i); T forEmptyConstant(EmptyConstant n); T forVariable(Variable v); T forPrimFun(PrimFun f); T forUnOpApp(UnOpApp u); T forBinOpApp(BinOpApp b); T forApp(App a); T forMapLiteral(MapLiteral m); T forIf(If i); T forLet(Let l); } /** Jam term AST type */ interface Term extends AST { public T accept(ASTVisitor v); } /** Jam constant type */ interface Constant extends Term { public T accept(ASTVisitor v); } /** Jam token type */ interface Token {} /** Jam Boolean constant class */ class BoolConstant implements Token, Constant { private boolean value; private BoolConstant(boolean b) { value = b; } // ** singleton pattern ** public static final BoolConstant FALSE = new BoolConstant(false); public static final BoolConstant TRUE = new BoolConstant(true); public boolean getValue() { return value; } public T accept(ASTVisitor v) { return v.forBoolConstant(this); } public String toString() { return String.valueOf(value); } } /** Jam integer constant class */ class IntConstant implements Token, Constant { private int value; IntConstant(int i) { value = i; } // duplicates can occur! public int getValue() { return value; } public T accept(ASTVisitor v) { return v.forIntConstant(this); } public String toString() { return String.valueOf(value); } } /** Jam null constant class, which is a singleton */ class EmptyConstant implements Token, Constant { public static final EmptyConstant ONLY = new EmptyConstant(); private EmptyConstant() {} public T accept(ASTVisitor v) { return v.forEmptyConstant(this); } public String toString() { return "null"; } } /** Jam primitive function Class */ class PrimFun implements Token, Term { private String name; PrimFun(String n) { name = n; } public String getName() { return name; } public T accept(ASTVisitor v) { return v.forPrimFun(this); } public String toString() { return name; } } /** Jam variable class. Part of Term and Token composite hierarchies. */ class Variable implements Token, Term { private String name; Variable(String n) { name = n; } public String getName() { return name; } public T accept(ASTVisitor v) { return v.forVariable(this); } public String toString() { return name; } } /** Jam operator class. Only a Token class. */ class Op implements Token { private String symbol; private boolean isUnOp; private boolean isBinOp; Op(String s, boolean iu, boolean ib) { symbol = s; isUnOp = iu; isBinOp = ib; } Op(String s) { // isBinOp only! this(s,false,true); } public String getSymbol() { return symbol; } public boolean isUnOp() { return isUnOp; } public boolean isBinOp() { return isBinOp; } public String toString() { return symbol; } } class KeyWord implements Token { private String name; KeyWord(String n) { name = n; } public String getName() { return name; } public String toString() { return name; } } /** Jam left paren token */ class LeftParen implements Token { public String toString() { return "("; } private LeftParen() {} public static final LeftParen ONLY = new LeftParen(); } /** Jam right paren token */ class RightParen implements Token { public String toString() { return ")"; } private RightParen() {} public static final RightParen ONLY = new RightParen(); } /** Jam left bracket token */ class LeftBrack implements Token { public String toString() { return "["; } private LeftBrack() {} public static final LeftBrack ONLY = new LeftBrack(); } /** Jam right bracket token */ class RightBrack implements Token { public String toString() { return "]"; } private RightBrack() {} public static final RightBrack ONLY = new RightBrack(); } /** Jam left brace token */ class LeftBrace implements Token { public String toString() { return "{"; } private LeftBrace() {} public static final LeftBrace ONLY = new LeftBrace(); } /** Jam right brace token */ class RightBrace implements Token { public String toString() { return "}"; } private RightBrace() {} public static final RightBrace ONLY = new RightBrace(); } /** Jam comma token */ class Comma implements Token { public String toString() { return ","; } private Comma() {} public static final Comma ONLY = new Comma(); } /** Jam semi-colon token */ class SemiColon implements Token { public String toString() { return ";"; } private SemiColon() {} public static final SemiColon ONLY = new SemiColon(); } /* AST class definitions */ /** Jam unary operator application class */ class UnOpApp implements AST { private Op rator; private AST arg; UnOpApp(Op r, AST a) { rator = r; arg = a; } public Op getRator() { return rator; } public AST getArg() { return arg; } public T accept(ASTVisitor v) { return v.forUnOpApp(this); } public String toString() { return rator + " " + arg; } } /** Jam binary operator application class */ class BinOpApp implements AST { private Op rator; private AST arg1, arg2; BinOpApp(Op r, AST a1, AST a2) { rator = r; arg1 = a1; arg2 = a2; } public Op getRator() { return rator; } public AST getArg1() { return arg1; } public AST getArg2() { return arg2; } public T accept(ASTVisitor v) { return v.forBinOpApp(this); } public String toString() { return "(" + arg1 + " " + rator + " " + arg2 + ")"; } } /** Jam closure (lambda) class */ class MapLiteral implements AST { private Variable[] vars; private AST body; MapLiteral(Variable[] v, AST b) { vars = v; body = b; } public Variable[] getVars() { return vars; } public AST getBody() { return body; } public T accept(ASTVisitor v) { return v.forMapLiteral(this); } public String toString() { return "map " + ToString.toString(vars, ",") + " to " + body; } } /** Jam function application class (rator must evaluate to PrimFun or MapLiteral closure) */ class App implements AST { private AST rator; private AST[] args; App(AST r, AST[] a) { rator = r; args = a; } public AST getRator() { return rator; } public AST[] getArgs() { return args; } public T accept(ASTVisitor v) { return v.forApp(this); } public String toString() { if ((rator instanceof Variable) || (rator instanceof PrimFun)) return rator + "(" + ToString.toString(args,", ") + ")"; else return "(" + rator + ")(" + ToString.toString(args,", ") + ")"; } } /** Jam if expression class */ class If implements AST { private AST test, conseq, alt; If(AST t, AST c, AST a) { test = t; conseq = c; alt = a; } public AST getTest() { return test; } public AST getConseq() { return conseq; } public AST getAlt() { return alt; } public T accept(ASTVisitor v) { return v.forIf(this); } public String toString() { return "if " + test + " then " + conseq + " else " + alt ; } } /** Jam let expression class */ class Let implements AST { private Def[] defs; private AST body; Let(Def[] d, AST b) { defs = d; body = b; } public T accept(ASTVisitor v) { return v.forLet(this); } public Def[] getDefs() { return defs; } public AST getBody() { return body; } public String toString() { return "let " + ToString.toString(defs," ") + " in " + body; } } /** Jam definition class */ class Def { private Variable lhs; private AST rhs; Def(Variable l, AST r) { lhs = l; rhs = r; } public Variable lhs() { return lhs; } public AST rhs() { return rhs; } public String toString() { return lhs + " := " + rhs + ";"; // return new StringBuilder(lhs.toString()).append(" := ").append(rhs.toString()).append(";").toString(); } } /** String utility class */ class ToString { /** Prints array a with separator s between elements. This method does NOT accept a == null, since null * is NOT an array. */ public static String toString(Object[] a, String s) { StringBuilder result = new StringBuilder(); for (int i = 0; i < a.length; i++) { if (i > 0) result.append(s); Object elt = a[i]; String eltString = (elt instanceof Object[]) ? toString((Object[]) elt, s) : elt.toString(); result.append(eltString); } return result.toString(); } } /** Class that represented parsing errors. */ class ParseException extends RuntimeException { ParseException(String s) { super(s); } } /** Jam lexer class. * Given a Lexer object, the next token in that input stream being * processed by the Lexer is returned by static method readToken(); it * throws a ParseException (a form of RuntimeException) if it * encounters a syntax error. Calling readToken() advances the cursor * in the input stream to the next token. * * The static method peek() in the Lexer class has the same behavior as * readToken() except for the fact that it does not advance the cursor. */ class Lexer extends StreamTokenizer { /* short names for StreamTokenizer codes */ public static final int WORD = StreamTokenizer.TT_WORD; public static final int NUMBER = StreamTokenizer.TT_NUMBER; public static final int EOF = StreamTokenizer.TT_EOF; public static final int EOL = StreamTokenizer.TT_EOL; /* operator Tokens */ // ::= | ~ | ! // ::= | "*" | / | = | != | < | > | <= | >= | & | "|" | // <- // ::= "+" | - // Note: there is no class distinction between and at // lexical level because of ambiguity; belongs to both public static final Op PLUS = new Op("+", true, true); public static final Op MINUS = new Op("-", true, true); public static final Op TIMES = new Op("*"); public static final Op DIVIDE = new Op("/"); public static final Op EQUALS = new Op("="); public static final Op NOT_EQUALS = new Op("!="); public static final Op LESS_THAN = new Op("<"); public static final Op GREATER_THAN = new Op(">"); public static final Op LESS_THAN_EQUALS = new Op("<="); public static final Op GREATER_THAN_EQUALS = new Op(">="); public static final Op NOT = new Op("~", true, false); public static final Op AND = new Op("&"); public static final Op OR = new Op("|"); /* Used to support reference cells. */ // public static final Op BANG = new Op("!", true, false); // public static final Op GETS = new Op("<-"); // public static final Op REF = new Op("ref", true, false); /* Keywords */ public static final KeyWord IF = new KeyWord("if"); public static final KeyWord THEN = new KeyWord("then"); public static final KeyWord ELSE = new KeyWord("else"); public static final KeyWord LET = new KeyWord("let"); // public static final KeyWord LETREC = new KeyWord("letrec"); // Used to support letrec extension public static final KeyWord IN = new KeyWord("in"); public static final KeyWord MAP = new KeyWord("map"); public static final KeyWord TO = new KeyWord("to"); public static final KeyWord BIND = new KeyWord(":="); // wordtable for classifying words in token stream public HashMap wordTable = new HashMap(); // Lexer peek cannot be implemented using StreamTokenizer pushBack // because some Tokens are composed of two StreamTokenizer tokens Token buffer; // holds token for peek() operation /* constructors */ /** Constructs a Lexer for the specified inputStream */ Lexer(Reader inputStream) { super(new BufferedReader(inputStream)); initLexer(); } /** Constructs a Lexer for the contents of the specified file */ Lexer(String fileName) throws IOException { this(new FileReader(fileName)); } /** Constructs a Lexer for the default console input stream System.in */ Lexer() { super(new BufferedReader(new InputStreamReader(System.in))); initLexer(); } /* Initializes lexer tables and the StreamTokenizer that the lexer extends */ private void initLexer() { // configure StreamTokenizer portion of this resetSyntax(); parseNumbers(); ordinaryChar('-'); slashSlashComments(true); wordChars('0', '9'); wordChars('a', 'z'); wordChars('A', 'Z'); wordChars('_', '_'); wordChars('?', '?'); whitespaceChars(0, ' '); // `+' `-' `*' `/' `~' `=' `<' `>' `&' `|' `:' `;' `,' '!' // `(' `)' `[' `]' are ordinary characters (self-delimiting) initWordTable(); buffer = null; // buffer initially empty } /** Reads tokens until next end-of-line */ public void flush() throws IOException { eolIsSignificant(true); while (nextToken() != EOL) ; // eat tokens until EOL eolIsSignificant(false); } /** Returns the next token in the input stream without consuming it */ public Token peek() { if (buffer == null) buffer = readToken(); return buffer; } /** Reads the next token as defined by StreamTokenizer in the input stream (consuming it). */ private int getToken() { // synonymous with nextToken() except for throwing an unchecked // ParseException instead of a checked IOException try { int tokenType = nextToken(); return tokenType; } catch(IOException e) { throw new ParseException("IOException " + e + "thrown by nextToken()"); } } /** Reads the next Token in the input stream (consuming it) */ public Token readToken() { /* Uses getToken() to read next token and constructs the Token object representing that token. * NOTE: token representations for all Token classes except IntConstant are unique; a HashMap * is used to avoid duplication. Hence, == can safely be used to compare all Tokens except * IntConstants for equality (assuming that code does not gratuitously create Tokens). */ if (buffer != null) { Token token = buffer; buffer = null; // clear buffer return token; } int tokenType = getToken(); switch (tokenType) { case NUMBER: int value = (int) nval; if (nval == (double) value) return new IntConstant(value); throw new ParseException("The number " + nval + " is not a 32 bit integer"); case WORD: Token regToken = wordTable.get(sval); if (regToken == null) { // must be new variable name Variable newVar = new Variable(sval); wordTable.put(sval, newVar); return newVar; } return regToken; case EOF: return null; case '(': return LeftParen.ONLY; case ')': return RightParen.ONLY; case '[': return LeftBrack.ONLY; case ']': return RightBrack.ONLY; // case '{': return LeftBrace.ONLY; // case '}': return RightBrace.ONLY; case ',': return Comma.ONLY; case ';': return SemiColon.ONLY; case '+': return PLUS; case '-': return MINUS; case '*': return TIMES; case '/': return DIVIDE; case '~': return NOT; case '=': return EQUALS; case '<': tokenType = getToken(); if (tokenType == '=') return LESS_THAN_EQUALS; // if (tokenType == '-') return GETS; // Used to support reference cells pushBack(); return LESS_THAN; case '>': tokenType = getToken(); if (tokenType == '=') return GREATER_THAN_EQUALS; pushBack(); return GREATER_THAN; case '!': tokenType = getToken(); if (tokenType == '=') return NOT_EQUALS; else throw new ParseException("!" + ((char) tokenType) + " is not a legal token"); /* this else clause supports reference cells */ // pushBack(); // return wordTable.get("!"); case '&': return AND; case '|': return OR; case ':': { tokenType = getToken(); if (tokenType == '=') return wordTable.get(":="); // ":=" is a keyword not an operator pushBack(); throw new ParseException("`:' is not a legalken"); } default: throw new ParseException("`" + ((char) tokenType) + "' is not a legal token"); } } /** Initializes the table of Strings used to recognize Tokens */ private void initWordTable() { // initialize wordTable // constants // ::= empty // ::= true | false wordTable.put("empty", EmptyConstant.ONLY); wordTable.put("true", BoolConstant.TRUE); wordTable.put("false", BoolConstant.FALSE); // Install keywords wordTable.put("if", IF); wordTable.put("then", THEN); wordTable.put("else", ELSE); wordTable.put("let", LET); wordTable.put("in", IN); wordTable.put("map", MAP); wordTable.put("to", TO); wordTable.put(":=", BIND); // Install primitive functions // ::= number? | function? | list? | empty? // | cons? | cons | first | rest | arity wordTable.put("number?", new PrimFun("number?")); wordTable.put("function?", new PrimFun("function?")); // wordTable.put("ref?", new PrimFun("ref?")); // used to support Jam references wordTable.put("list?", new PrimFun("list?")); wordTable.put("empty?", new PrimFun("empty?")); wordTable.put("cons?", new PrimFun("cons?")); wordTable.put("arity", new PrimFun("arity")); wordTable.put("cons", new PrimFun("cons")); wordTable.put("first", new PrimFun("first")); wordTable.put("rest", new PrimFun("rest")); } }