ivo/src/Ivo/Syntax/Parser.hs

218 lines
6.5 KiB
Haskell

module Ivo.Syntax.Parser
( ParseError, parse
, DeclOrExprAST, ProgramAST
, parseAST, parseDeclOrExpr, parseProgram
, astParser, declOrExprParser, programParser
) where
import Ivo.Syntax.Base
import Data.List.NonEmpty (fromList)
import Data.Text qualified as T
import Prelude hiding (succ, either)
import Text.Parsec hiding (label, token, spaces)
import Text.Parsec qualified
import Text.Parsec.Text (Parser)
spaces :: Parser ()
spaces = Text.Parsec.spaces >> optional (try (comment >> spaces))
where
comment, lineComment, blockComment :: Parser ()
comment = blockComment <|> lineComment
lineComment = label "line comment" $ do
_ <- try (string "//")
_ <- many1 (noneOf "\n")
pure ()
blockComment = label "block comment" $ do
_ <- try (string "/*")
_ <- many1 $ notFollowedBy (string "*/") >> anyChar
_ <- string "*/"
pure ()
label :: String -> Parser a -> Parser a
label = flip Text.Parsec.label
token :: Char -> Parser ()
token ch = label [ch] $ char ch *> spaces
keywords :: [Text]
keywords = ["let", "in", "Left", "Right", "S", "Z", "Char"]
-- | A keyword is an exact string which is not part of an identifier.
keyword :: Text -> Parser ()
keyword kwd = label (T.unpack kwd) $ do
try do
_ <- string $ T.unpack kwd
notFollowedBy letter
spaces
-- | An identifier is a sequence of letters which is not a keyword.
identifier :: Parser Text
identifier = label "identifier" $ do
notFollowedBy anyKeyword
T.pack <$> (many1 letter <* spaces)
where anyKeyword = choice $ map keyword keywords
variable :: Parser AST
variable = label "variable" $ Var <$> identifier
many1' :: Parser a -> Parser (NonEmpty a)
many1' p = fromList <$> many1 p
many2 :: Parser a -> Parser (a, NonEmpty a)
many2 p = (,) <$> p <*> many1' p
grouping :: Parser AST
grouping = label "grouping" $ between (token '(') (token ')') ambiguous
application :: Parser AST
application = uncurry App <$> many2 block
abstraction :: Parser AST
abstraction = label "lambda abstraction" $ Abs <$> between lambda (token '.') (many1' identifier) <*> ambiguous
where lambda = label "lambda" $ (char '\\' <|> char 'λ') *> spaces
definition :: Parser (Def Parse)
definition = do
name <- identifier
token '='
value <- ambiguous
pure (name, value)
let_ :: Parser AST
let_ = letrecstar <|> letstar
where
letrecstar = LetRecP <$> between (try (keyword "letrec")) (keyword "in") definition <*> ambiguous
letstar = Let <$> between (keyword "let") (keyword "in") definitions <*> ambiguous
definitions :: Parser (NonEmpty (Def Parse))
definitions = fromList <$> sepBy1 definition (token ';')
ctr :: Parser AST
ctr = pair <|> unit <|> either <|> nat <|> list <|> str
where
unit, pairCtr, tuple, either, left, right,
zero, succ, natLit, consCtr, cons, charCtr, charLit, strLit :: Parser AST
unit = Ctr CUnit [] <$ keyword "()"
pair = pairCtr <|> tuple
pairCtr = Ctr CPair [] <$ keyword "(,)"
tuple = try $ between (token '(') (token ')') do
e1 <- ambiguous
token ','
e2 <- ambiguous
pure $ Ctr CPair [e1, e2]
either = left <|> right
left = Ctr CLeft [] <$ keyword "Left"
right = Ctr CRight [] <$ keyword "Right"
nat = zero <|> succ <|> natLit
zero = Ctr CZero [] <$ keyword "Z"
succ = Ctr CSucc [] <$ keyword "S"
natLit = (PNat . read <$> many1 digit) <* spaces
list = cons <|> consCtr <|> listLit
consCtr = Ctr CCons [] <$ keyword "(::)"
cons = try $ between (token '(') (token ')') do
e1 <- ambiguous
keyword "::"
e2 <- ambiguous
pure $ Ctr CCons [e1, e2]
listLit = fmap PList $ between (token '[') (token ']') $ sepEndBy ambiguous (token ',')
str = charCtr <|> charLit <|> strLit
charCtr = Ctr CChar [] <$ keyword "Char"
charLit = fmap PChar $ char '\'' *> anyChar <* spaces
strLit = fmap (PStr . T.pack) $ between (token '"') (token '"') $ many (noneOf "\"")
pat :: Parser (Pat Parse)
pat = label "case alternate" $ do
(c, ns) <- label "pattern" $
pair <|> unit <|> left <|> right <|> zero <|> succ <|> nil <|> cons <|> char'
keyword "->"
e <- ambiguous
pure $ Pat c ns e
where
pair = try $ between (token '(') (token ')') do
e1 <- identifier
token ','
e2 <- identifier
pure (CPair, [e1, e2])
unit = (CUnit, []) <$ keyword "()"
left = do
keyword "Left"
e <- identifier
pure (CLeft, [e])
right = do
keyword "Right"
e <- identifier
pure (CRight, [e])
zero = (CZero, []) <$ keyword "Z"
succ = do
keyword "S"
e <- identifier
pure (CSucc, [e])
nil = (CNil, []) <$ keyword "[]"
cons = try $ between (token '(') (token ')') do
e1 <- identifier
keyword "::"
e2 <- identifier
pure (CCons, [e1, e2])
char' = do
keyword "Char"
e <- identifier
pure (CChar, [e])
case_ :: Parser AST
case_ = label "case patterns" $ do
token '{'
pats <- sepEndBy pat (token ';')
token '}'
pure $ Case pats
hole :: Parser AST
hole = label "hole" $ HoleP <$ token '_'
-- | Guaranteed to consume a finite amount of input
finite :: Parser AST
finite = label "finite expression" $ variable <|> hole <|> ctr <|> case_ <|> grouping
-- | Guaranteed to consume input, but may continue until it reaches a terminator
block :: Parser AST
block = label "block expression" $ abstraction <|> let_ <|> finite
-- | Not guaranteed to consume input at all, may continue until it reaches a terminator
ambiguous :: Parser AST
ambiguous = label "any expression" $ try application <|> block
astParser :: Parser AST
astParser = ambiguous
parseAST :: Text -> Either ParseError AST
parseAST = parse (spaces *> ambiguous <* eof) "input"
type Declaration = (Text, AST)
declaration :: Parser Declaration
declaration = notFollowedBy (try let_) >> (declrec <|> decl)
where
declrec = do
try $ keyword "letrec"
(name, expr) <- definition
pure (name, LetRecP (name, expr) (Var name))
decl = do
keyword "let"
definition
-- | A program is a series of declarations and expressions to execute.
type ProgramAST = [DeclOrExprAST]
type DeclOrExprAST = Either Declaration AST
declOrExprParser :: Parser DeclOrExprAST
declOrExprParser = try (Left <$> declaration) <|> (Right <$> ambiguous)
programParser :: Parser ProgramAST
programParser = spaces *> sepEndBy declOrExprParser (token ';') <* eof
parseDeclOrExpr :: Text -> Either ParseError DeclOrExprAST
parseDeclOrExpr = parse declOrExprParser "input"
parseProgram :: Text -> Either ParseError ProgramAST
parseProgram = parse programParser "input"