
-- ***************************************************************************
-- *                                                                         *
-- *                                                                         *
-- *                               ParseLib.hs                               *
-- *                                                                         *
-- *                           Revision 1997-08-15                           *
-- *                                                                         *
-- *                      Translated swedish -> english                      *
-- *                               2001-03-02                                *
-- *                                                                         *
-- ***************************************************************************
-- *                                                                         *
-- * This library corresponds to the one described in Jeroen Fokkers article *
-- *                                                                         *
-- ***************************************************************************

module ParseLib where

import Lexer

infixl 1 `thenP`, `leftP`, `rightP`, `apP`, `chkP`
infixl 0 `mapP`, `elseP`

-- ***************************************************************************
-- *                                                                         *
-- *                               Parser type                               *
-- *                                                                         *
-- ***************************************************************************
-- *                                                                         *
-- * A datatype is used since this simplifies the functions signatures       *
-- * somewhat.                                                               *
-- *                                                                         *
-- ***************************************************************************

data Parser symbol result = P ( [symbol] -> [([symbol], result)] )

unP :: Parser a b -> [a] -> [([a], b)]
unP (P p) = p

-- ***************************************************************************
-- *                                                                         *
-- * success is a parser that always succeeds.                               *
-- *                                                                         *
-- ***************************************************************************

success :: b -> Parser a b
success r = P (\xs -> [(xs, r)])

-- ***************************************************************************
-- *                                                                         *
-- * failure is a parser that always fails.                                  *
-- *                                                                         *
-- ***************************************************************************

failure :: Parser a b
failure = P (\_ -> [])

-- ***************************************************************************
-- *                                                                         *
-- *                            Primitive parsers                            *
-- *                                                                         *
-- ***************************************************************************

-- ***************************************************************************
-- *                                                                         *
-- * match tests the first symbol against a predicate. If True is returned,  *
-- * the parser succeeds. The symbols s corresponds to match (==s)           *
-- *                                                                         *
-- ***************************************************************************

match :: (a->Bool) -> Parser a a
match pred = P p
             where p (x:xs) | pred x = [(xs, x)]
	           p _               = []

symbol :: Eq a => a -> Parser a a
symbol s = match (== s)

-- ***************************************************************************
-- *                                                                         *
-- *                          Parser combinators                             *
-- *                                                                         *
-- ***************************************************************************

-- ***************************************************************************
-- *                                                                         *
-- * thenP:                                                                  *
-- * p `thenP` q means: First parse with p. If p succeeds the rest of the    *
-- * in data is parsed with q. If both parsers succeed their results are     *
-- * returned as a pair.                                                     *
-- *                                                                         *
-- * elseP:                                                                  *
-- * p `elseP` q means: First parse with p. If p succeeds return the result. *
-- * Otherwise if p fails, the parser q is used instead.                     *
-- *                                                                         *
-- * leftP works like thenP but throws away the right result. Note the right *
-- * parser is still called.                                                 *
-- *                                                                         *
-- * rightP works like leftP, but the left result is thrown away insead of   * 
-- * the right.                                                              *
-- *                                                                         *
-- ***************************************************************************

thenP :: Parser a b -> Parser a c -> Parser a (b,c)
thenP p q = P r
	    where r xs = [ (zs,(y,z)) | (ys,y) <- unP p xs, (zs,z) <- unP q ys ]

elseP :: Parser a b -> Parser a b -> Parser a b
elseP p q = P r
            where r xs = unP p xs ++ unP q xs

leftP :: Parser a b -> Parser a c -> Parser a b
leftP p q = P r
	    where r xs = [ (zs,y) | (ys,y) <- unP p xs, (zs,z) <- unP q ys ]


rightP :: Parser a b -> Parser a c -> Parser a c
rightP p q = P r
	    where r xs = [ (zs,z) | (ys,y) <- unP p xs, (zs,z) <- unP q ys ]



-- ***************************************************************************
-- *                                                                         *
-- * mapP is used to change the result type of a parser. f `mapP` p means:   *
-- * parse with p and apply f to the result. The result of the application   *
-- * is returned as the result of the parser.
-- *                                                                         *
-- ***************************************************************************

mapP :: (b -> c) -> Parser a b -> Parser a c
mapP f p = P r
	   where r xs = [ (ys, f y) | (ys,y) <- unP p xs ]

-- ***************************************************************************
-- *                                                                         *
-- * apP works almost like mapP.
-- * apP fungerar nstan som mapP. Det som skiljer r att den fr funktionen *
-- * som resultatet av en parsning. pf `apP` p tar ut funktionen (f) ut ur   *
-- * pf, den parsar med p och som resultat ger den f applicerat p           *
-- * resultatet av p.                                                        *
-- *                                                                         *
-- * x `chkP` p is used to throw away the result of p. The parser is called  *
-- * but the result is replaced by x.                                        *
-- *                                                                         *
-- * Note: apP and chkP can be used as replacements for thenP, leftP and     *
-- * rightP. 
-- * Notera: apP och chkP kan anvndas som ersttning fr thenP, leftP och   *
-- * rightP. Pss. slipper man skriva de specialfunktioner som tar tuplerna   *
-- * som skapas och ger dem som argument till t.ex. konstruerare.            *
-- *                                                                         *
-- * Ex: Antag att vi parsar regeln <expr> '+' <expr>. Fr <expr> anvnder   *
-- * vi parsern pExpr som har typen Parser String Expr och fr '+' anvnder  *
-- * vi parsern pPlus, med typen Parser String (). Detta vill vi ge till     *
-- * konstrueraren PlusExpr som har typen Expr -> Expr -> Expr. Fr att f   *
-- * bort () frn pPlus anvnder vi oss av leftP:                            *
-- *                                                                         *
-- * (uncurry PlusExpr) `mapP` (pExpr `leftP` pPlus `thenP` pExpr)           *
-- *                                                                         *
-- * Om vi i stllet anvnder oss enbart av apP och chkP fs:                *
-- *                                                                         *
-- * return PlusExpr `apP` pExpr `chkP` pPlus `apP` pExpr                    *
-- *                                                                         *
-- ***************************************************************************

apP :: Parser a (b -> c) -> Parser a b -> Parser a c
apP pf p = P r
	   where r xs = [ (zs, f z) | (ys,f) <- unP pf xs, (zs,z) <- unP p ys ]

chkP :: Parser a c -> Parser a b -> Parser a c
chkP px p = P r
	    where r xs = [ (zs, f) | (ys,f) <- unP px xs, (zs,z) <- unP p ys ]

-- data Expr = PlusExpr Expr Expr

-- pExpr :: Parser String Expr
-- pExpr = error "42"

-- pPlus  :: Parser String ()
-- pPlus = error "42"

-- ***************************************************************************
-- *                                                                         *
-- * Ibland ger en parser flera resultat. (Vi har ju en lista av par.) Om    *
-- * man vet att endast det frsta resultatet r det man eg. vill ha, r det *
-- * ondigt att slpa p de, oftast felaktiga, resultaten. cut klipper bort *
-- * alla utom det frsta resultatet. OBS! Det kan vara farligt att anvnda  *
-- * cut om inte det resultat vi vill ha ligger frst.                       *
-- *                                                                         *
-- ***************************************************************************

cutP :: Parser a b -> Parser a b
cutP p = P q
         where q xs = cut (unP p xs)
               cut [] = []
               cut (x:_) = [x]

-- ***************************************************************************
-- *                                                                         *
-- * manyP och someP anvnds nr man vill parsa flera likartade element      *
-- * efter varandra. De tar en parser av typen Parser a b och skapara en     *
-- * Parser a [b], dvs en parser som parsar flera strngar och bygger en     *
-- * lista av resultaten. manyP fungerar fr noll eller flera upprepningar,  *
-- * someP krver att minst en, den frsta, kan parsas.                      *
-- *                                                                         *
-- ***************************************************************************

manyP :: Parser a b -> Parser a [b]
manyP p = (list `mapP` (p `thenP` manyP p)) `elseP` success []
          where list (x,xs) = x:xs

someP :: Parser a b -> Parser a [b]
someP p = list `mapP` (p `thenP` manyP p)
          where list (x,xs) = x:xs

-- ***************************************************************************
-- *                                                                         *
-- *                                TokParser                                *
-- *                                                                         *
-- ***************************************************************************
-- *                                                                         *
-- * Hr finns det parsrar som arbetar p tokenstrmmar dvs                  *
-- * [(String, Token)]                                                       *
-- *                                                                         *
-- * number matchar ett heltal i strmmen. ident matchar identifierare.      *
-- * key k lyckas om frsta toknet r ett reserverat ord k. symb s lyckas om *
-- * 1:a toknet r symbolen s.                                               *
-- *                                                                         *
-- ***************************************************************************

type TokParser a = Parser TokWord a

number :: TokParser Int
number = (read.fst) `mapP` match ((==Num).snd)

ident :: TokParser String
ident = fst `mapP` match ((==Id).snd)

key :: String -> TokParser String
key k = fst `mapP` match (==(k,Key))

symb :: String -> TokParser String
symb s = fst `mapP` match (==(s,Symbol))

eofP p = P q
         where q xs = filter (null.fst) (unP p xs)

-- ***************************************************************************
-- *                                                                         *
-- * parse tar en parser och en indatastrm, och om parsningen lyckas        *
-- * returneras den 1:a parsningen, om den misslyckas genereras ett          *
-- * programfel.                                                             *
-- *                                                                         *
-- ***************************************************************************

parser :: Parser a b -> [a] -> b
parser p xs = case unP (eofP p) xs of
               []  -> error "Syntax Error!!\n"
	       (_, b):_ -> b
