
-- ***************************************************************************
-- *                                                                         *
-- *                                                                         *
-- *                                 Lexer.hs                                *
-- *                                                                         *
-- *                           Revision 1997-08-19                           *
-- *                                                                         *
-- * Modified, CCF: Jan 16, 2001                                             *
-- *                                                                         *
-- ***************************************************************************

module Lexer(Token(..), TokWord(..), TokList(..), lexer) where

import Char

{- ***************************************************************************

This module converts a string to a token list.

lexer :: String -> TokList


TokList is a list of pairs containing the matched string and the token. 

The following types are defined:

  *  Num: Integers

  *  Key: Reserved keywords, defined in "keywords"

  *  Symbol: A string of symbols from "symbols" and/or "special".

  *  Id: A alphanummeric identifier ('_' allowed)

  *  Cons: A constructor: Id followed by a '/' and a number

  *  Destr: A destructor: Number followed by {"st", "nd", "rd", "th"}

   ***************************************************************************

"keywords" is a list of reserved words.


-}

-- **** Keywords follow. ****
keywords :: [String]
keywords = ["if", "then", "else", "let", "in"] 

{- ***************************************************************************

"symbols" and "special" contains the set of symbols that will be represented 
by the symbol token.

A string of "symbols" will be interpreted as a single token.

A string of "special" will be interpreted as a sequence of tokens, each 
representing a single symbol.

-}

symbols, special :: [Char]

symbols = []

special = ['(', ')', '=', ',']

-- ***************************************************************************

data Token = Num | Id | Key | Cons | Destr | Symbol deriving (Show, Read, Eq)

type TokWord = (String, Token)

type TokList = [ TokWord ]

-- ***************************************************************************

destrNames = ["st", "nd", "rd", "th"]

lexer :: String -> TokList
lexer [] = []
lexer ('-':cs) = case cs of
                   []       -> lexer cs
                   (c2:c2s) -> lexer (dropWhile (/='\n') c2s) -- Comments
lexer (c:cs) | isSpace c = lexer (dropWhile isSpace cs)
             | isDigit c = let (fs,rs) = span isDigit cs
	                   in if (beginsWith rs isAlphaNum)
                              then 
                                let (fss,rss) = span isAlphaNum rs
                                in if fss `elem` destrNames
                                   then (c:(fs ++ fss), Destr) : lexer rss
                                   else error ("Lexer: Invalid destructor suffix \'"++fss++"\' in context \""++c : take 10 cs++"\"\n")
                             else (c:fs, Num) : lexer rs
	     | isSpec c  = ([c], Symbol) : lexer cs
             | isSym c   = let (fs,rs) = span isSym cs
	                   in (c:fs, Symbol) : lexer rs
             | isUcase c = let (fs,rs) = span isId cs
                               xs = c:fs
                           in if beginsWith rs isSlash
                             then
                               let (fss,rss)=span isDigit 
                                                  (stripCond rs isSlash)
                               in (xs, Cons) : lexer rss
                             else (xs, Cons) : lexer rs
	     | isId c    = let (fs,rs) = span isId cs
	                       xs = c:fs
		  	   in if xs `elem` keywords
			      then (xs, Key) : lexer rs
			      else (xs, Id) : lexer rs
	     | otherwise = error ("Lexer: Unknown symbol \'"++[c]++"\' in context \""++c : take 10 cs++"\"\n")

stripCond :: String -> (Char -> Bool) -> String
stripCond [] f = []
stripCond (c:cs) f = if f c then cs else c:cs

beginsWith :: String -> (Char -> Bool) -> Bool
beginsWith [] f = False
beginsWith (c:cs) f = f c

isSpec :: Char -> Bool
isSpec c = c `elem` special

isSym :: Char -> Bool
isSym c = c `elem` symbols

isId :: Char -> Bool
isId c = isAlphaNum c || c=='_'

isUcase :: Char -> Bool
isUcase c = c `elem` ['A'..'Z']

isSlash :: Char->Bool
isSlash c = c=='/'


