218 lines
6.5 KiB
Haskell
218 lines
6.5 KiB
Haskell
module Ivo.Syntax.Parser
|
|
( ParseError, parse
|
|
, DeclOrExprAST, ProgramAST
|
|
, parseAST, parseDeclOrExpr, parseProgram
|
|
, astParser, declOrExprParser, programParser
|
|
) where
|
|
|
|
import Ivo.Syntax.Base
|
|
|
|
import Data.List.NonEmpty (fromList)
|
|
import Data.Text qualified as T
|
|
import Prelude hiding (succ, either)
|
|
import Text.Parsec hiding (label, token, spaces)
|
|
import Text.Parsec qualified
|
|
import Text.Parsec.Text (Parser)
|
|
|
|
spaces :: Parser ()
|
|
spaces = Text.Parsec.spaces >> optional (try (comment >> spaces))
|
|
where
|
|
comment, lineComment, blockComment :: Parser ()
|
|
comment = blockComment <|> lineComment
|
|
lineComment = label "line comment" $ do
|
|
_ <- try (string "//")
|
|
_ <- many1 (noneOf "\n")
|
|
pure ()
|
|
blockComment = label "block comment" $ do
|
|
_ <- try (string "/*")
|
|
_ <- many1 $ notFollowedBy (string "*/") >> anyChar
|
|
_ <- string "*/"
|
|
pure ()
|
|
|
|
label :: String -> Parser a -> Parser a
|
|
label = flip Text.Parsec.label
|
|
|
|
token :: Char -> Parser ()
|
|
token ch = label [ch] $ char ch *> spaces
|
|
|
|
keywords :: [Text]
|
|
keywords = ["let", "in", "Left", "Right", "S", "Z", "Char"]
|
|
|
|
-- | A keyword is an exact string which is not part of an identifier.
|
|
keyword :: Text -> Parser ()
|
|
keyword kwd = label (T.unpack kwd) $ do
|
|
try do
|
|
_ <- string $ T.unpack kwd
|
|
notFollowedBy letter
|
|
spaces
|
|
|
|
-- | An identifier is a sequence of letters which is not a keyword.
|
|
identifier :: Parser Text
|
|
identifier = label "identifier" $ do
|
|
notFollowedBy anyKeyword
|
|
T.pack <$> (many1 letter <* spaces)
|
|
where anyKeyword = choice $ map keyword keywords
|
|
|
|
variable :: Parser AST
|
|
variable = label "variable" $ Var <$> identifier
|
|
|
|
many1' :: Parser a -> Parser (NonEmpty a)
|
|
many1' p = fromList <$> many1 p
|
|
|
|
many2 :: Parser a -> Parser (a, NonEmpty a)
|
|
many2 p = (,) <$> p <*> many1' p
|
|
|
|
grouping :: Parser AST
|
|
grouping = label "grouping" $ between (token '(') (token ')') ambiguous
|
|
|
|
application :: Parser AST
|
|
application = uncurry App <$> many2 block
|
|
|
|
abstraction :: Parser AST
|
|
abstraction = label "lambda abstraction" $ Abs <$> between lambda (token '.') (many1' identifier) <*> ambiguous
|
|
where lambda = label "lambda" $ (char '\\' <|> char 'λ') *> spaces
|
|
|
|
definition :: Parser (Def Parse)
|
|
definition = do
|
|
name <- identifier
|
|
token '='
|
|
value <- ambiguous
|
|
pure (name, value)
|
|
|
|
let_ :: Parser AST
|
|
let_ = letrecstar <|> letstar
|
|
where
|
|
letrecstar = LetRecP <$> between (try (keyword "letrec")) (keyword "in") definition <*> ambiguous
|
|
letstar = Let <$> between (keyword "let") (keyword "in") definitions <*> ambiguous
|
|
|
|
definitions :: Parser (NonEmpty (Def Parse))
|
|
definitions = fromList <$> sepBy1 definition (token ';')
|
|
|
|
ctr :: Parser AST
|
|
ctr = pair <|> unit <|> either <|> nat <|> list <|> str
|
|
where
|
|
unit, pairCtr, tuple, either, left, right,
|
|
zero, succ, natLit, consCtr, cons, charCtr, charLit, strLit :: Parser AST
|
|
unit = Ctr CUnit [] <$ keyword "()"
|
|
pair = pairCtr <|> tuple
|
|
pairCtr = Ctr CPair [] <$ keyword "(,)"
|
|
tuple = try $ between (token '(') (token ')') do
|
|
e1 <- ambiguous
|
|
token ','
|
|
e2 <- ambiguous
|
|
pure $ Ctr CPair [e1, e2]
|
|
either = left <|> right
|
|
left = Ctr CLeft [] <$ keyword "Left"
|
|
right = Ctr CRight [] <$ keyword "Right"
|
|
nat = zero <|> succ <|> natLit
|
|
zero = Ctr CZero [] <$ keyword "Z"
|
|
succ = Ctr CSucc [] <$ keyword "S"
|
|
natLit = (PNat . read <$> many1 digit) <* spaces
|
|
list = cons <|> consCtr <|> listLit
|
|
consCtr = Ctr CCons [] <$ keyword "(::)"
|
|
cons = try $ between (token '(') (token ')') do
|
|
e1 <- ambiguous
|
|
keyword "::"
|
|
e2 <- ambiguous
|
|
pure $ Ctr CCons [e1, e2]
|
|
listLit = fmap PList $ between (token '[') (token ']') $ sepEndBy ambiguous (token ',')
|
|
str = charCtr <|> charLit <|> strLit
|
|
charCtr = Ctr CChar [] <$ keyword "Char"
|
|
charLit = fmap PChar $ char '\'' *> anyChar <* spaces
|
|
strLit = fmap (PStr . T.pack) $ between (token '"') (token '"') $ many (noneOf "\"")
|
|
|
|
pat :: Parser (Pat Parse)
|
|
pat = label "case alternate" $ do
|
|
(c, ns) <- label "pattern" $
|
|
pair <|> unit <|> left <|> right <|> zero <|> succ <|> nil <|> cons <|> char'
|
|
keyword "->"
|
|
e <- ambiguous
|
|
pure $ Pat c ns e
|
|
where
|
|
pair = try $ between (token '(') (token ')') do
|
|
e1 <- identifier
|
|
token ','
|
|
e2 <- identifier
|
|
pure (CPair, [e1, e2])
|
|
unit = (CUnit, []) <$ keyword "()"
|
|
left = do
|
|
keyword "Left"
|
|
e <- identifier
|
|
pure (CLeft, [e])
|
|
right = do
|
|
keyword "Right"
|
|
e <- identifier
|
|
pure (CRight, [e])
|
|
zero = (CZero, []) <$ keyword "Z"
|
|
succ = do
|
|
keyword "S"
|
|
e <- identifier
|
|
pure (CSucc, [e])
|
|
nil = (CNil, []) <$ keyword "[]"
|
|
cons = try $ between (token '(') (token ')') do
|
|
e1 <- identifier
|
|
keyword "::"
|
|
e2 <- identifier
|
|
pure (CCons, [e1, e2])
|
|
char' = do
|
|
keyword "Char"
|
|
e <- identifier
|
|
pure (CChar, [e])
|
|
|
|
case_ :: Parser AST
|
|
case_ = label "case patterns" $ do
|
|
token '{'
|
|
pats <- sepEndBy pat (token ';')
|
|
token '}'
|
|
pure $ Case pats
|
|
|
|
hole :: Parser AST
|
|
hole = label "hole" $ HoleP <$ token '_'
|
|
|
|
-- | Guaranteed to consume a finite amount of input
|
|
finite :: Parser AST
|
|
finite = label "finite expression" $ variable <|> hole <|> ctr <|> case_ <|> grouping
|
|
|
|
-- | Guaranteed to consume input, but may continue until it reaches a terminator
|
|
block :: Parser AST
|
|
block = label "block expression" $ abstraction <|> let_ <|> finite
|
|
|
|
-- | Not guaranteed to consume input at all, may continue until it reaches a terminator
|
|
ambiguous :: Parser AST
|
|
ambiguous = label "any expression" $ try application <|> block
|
|
|
|
astParser :: Parser AST
|
|
astParser = ambiguous
|
|
|
|
parseAST :: Text -> Either ParseError AST
|
|
parseAST = parse (spaces *> ambiguous <* eof) "input"
|
|
|
|
type Declaration = (Text, AST)
|
|
|
|
declaration :: Parser Declaration
|
|
declaration = notFollowedBy (try let_) >> (declrec <|> decl)
|
|
where
|
|
declrec = do
|
|
try $ keyword "letrec"
|
|
(name, expr) <- definition
|
|
pure (name, LetRecP (name, expr) (Var name))
|
|
decl = do
|
|
keyword "let"
|
|
definition
|
|
|
|
-- | A program is a series of declarations and expressions to execute.
|
|
type ProgramAST = [DeclOrExprAST]
|
|
type DeclOrExprAST = Either Declaration AST
|
|
|
|
declOrExprParser :: Parser DeclOrExprAST
|
|
declOrExprParser = try (Left <$> declaration) <|> (Right <$> ambiguous)
|
|
|
|
programParser :: Parser ProgramAST
|
|
programParser = spaces *> sepEndBy declOrExprParser (token ';') <* eof
|
|
|
|
parseDeclOrExpr :: Text -> Either ParseError DeclOrExprAST
|
|
parseDeclOrExpr = parse declOrExprParser "input"
|
|
|
|
parseProgram :: Text -> Either ParseError ProgramAST
|
|
parseProgram = parse programParser "input"
|