ivo/src/Ivo/Syntax/Parser.hs

306 lines
8.9 KiB
Haskell
Raw Normal View History

2021-03-26 12:31:55 -07:00
module Ivo.Syntax.Parser
( ParseError, parse
, Declaration, TopLevelAST, ProgramAST
, parseAST, parseTopLevel, parseProgram
, typeParser, schemeParser, astParser, topLevelParser, programParser
) where
2021-03-26 12:31:55 -07:00
import Ivo.Syntax.Base
2021-03-26 14:55:23 -07:00
import Data.Functor.Identity (Identity)
import Data.List.NonEmpty (fromList)
import Data.Text qualified as T
import Prelude hiding (succ, either)
import Text.Parsec hiding (label, token, spaces)
import Text.Parsec qualified
2021-03-26 14:55:23 -07:00
import Text.Parsec.Expr
import Text.Parsec.Text (Parser)
spaces :: Parser ()
spaces = Text.Parsec.spaces >> optional (try (comment >> spaces))
where
comment, lineComment, blockComment :: Parser ()
comment = blockComment <|> lineComment
lineComment = label "line comment" $ do
_ <- try (string "//")
_ <- many1 (noneOf "\n")
pure ()
blockComment = label "block comment" $ do
_ <- try (string "/*")
_ <- many1 $ notFollowedBy (string "*/") >> anyChar
_ <- string "*/"
pure ()
label :: String -> Parser a -> Parser a
label = flip Text.Parsec.label
token :: Char -> Parser ()
token ch = label [ch] $ char ch *> spaces
keywords :: [Text]
2021-03-26 14:55:23 -07:00
keywords = ["let", "in", "Left", "Right", "S", "Z", "forall", "Char", "Void", "Unit", "Nat", "Char"]
-- | A keyword is an exact string which is not part of an identifier.
keyword :: Text -> Parser ()
keyword kwd = label (T.unpack kwd) $ do
try do
_ <- string $ T.unpack kwd
notFollowedBy letter
spaces
-- | An identifier is a sequence of letters which is not a keyword.
identifier :: Parser Text
identifier = label "identifier" $ do
notFollowedBy anyKeyword
T.pack <$> (many1 letter <* spaces)
where anyKeyword = choice $ map keyword keywords
variable :: Parser AST
variable = label "variable" $ Var <$> identifier
2021-03-26 14:55:23 -07:00
tvariable :: Parser Type
tvariable = label "variable" $ TVar <$> identifier
many1' :: Parser a -> Parser (NonEmpty a)
many1' p = fromList <$> many1 p
many2 :: Parser a -> Parser (a, NonEmpty a)
many2 p = (,) <$> p <*> many1' p
grouping :: Parser AST
grouping = label "grouping" $ between (token '(') (token ')') ambiguous
2021-03-26 14:55:23 -07:00
tgrouping :: Parser Type
tgrouping = label "grouping" $ between (token '(') (token ')') tambiguous
application :: Parser AST
2021-03-26 14:55:23 -07:00
application = label "application" $ uncurry App <$> many2 block
tapplication :: Parser Type
tapplication = label "application" $ uncurry tapp' <$> many2 tblock
where tapp' t1 (t2 :| ts) = tapp (t1 : t2 : ts)
abstraction :: Parser AST
abstraction = label "lambda abstraction" $ Abs <$> between lambda (token '.') (many1' identifier) <*> ambiguous
where lambda = label "lambda" $ (char '\\' <|> char 'λ') *> spaces
definition :: Parser (Def Parse)
2021-03-26 14:55:23 -07:00
definition = label "definition" $ do
name <- identifier
token '='
value <- ambiguous
pure (name, value)
let_ :: Parser AST
2021-03-26 14:55:23 -07:00
let_ = label "let expression" $ letrecstar <|> letstar
where
letrecstar = LetRecP <$> between (try (keyword "letrec")) (keyword "in") definition <*> ambiguous
letstar = Let <$> between (keyword "let") (keyword "in") definitions <*> ambiguous
definitions :: Parser (NonEmpty (Def Parse))
definitions = fromList <$> sepBy1 definition (token ';')
ctr :: Parser AST
2021-03-26 14:55:23 -07:00
ctr = label "data constructor" $ pair <|> unit <|> either <|> nat <|> list <|> str
where
unit, pairCtr, tuple, either, left, right,
zero, succ, natLit, consCtr, cons, charCtr, charLit, strLit :: Parser AST
unit = Ctr CUnit [] <$ keyword "()"
pair = pairCtr <|> tuple
pairCtr = Ctr CPair [] <$ keyword "(,)"
tuple = try $ between (token '(') (token ')') do
e1 <- ambiguous
token ','
e2 <- ambiguous
pure $ Ctr CPair [e1, e2]
either = left <|> right
left = Ctr CLeft [] <$ keyword "Left"
right = Ctr CRight [] <$ keyword "Right"
nat = zero <|> succ <|> natLit
zero = Ctr CZero [] <$ keyword "Z"
succ = Ctr CSucc [] <$ keyword "S"
natLit = (PNat . read <$> many1 digit) <* spaces
list = cons <|> consCtr <|> listLit
consCtr = Ctr CCons [] <$ keyword "(::)"
cons = try $ between (token '(') (token ')') do
e1 <- ambiguous
keyword "::"
e2 <- ambiguous
pure $ Ctr CCons [e1, e2]
listLit = fmap PList $ between (token '[') (token ']') $ sepEndBy ambiguous (token ',')
str = charCtr <|> charLit <|> strLit
charCtr = Ctr CChar [] <$ keyword "Char"
charLit = fmap PChar $ char '\'' *> anyChar <* spaces
strLit = fmap (PStr . T.pack) $ between (token '"') (token '"') $ many (noneOf "\"")
pat :: Parser (Pat Parse)
pat = label "case alternate" $ do
(c, ns) <- label "pattern" $
pair <|> unit <|> left <|> right <|> zero <|> succ <|> nil <|> cons <|> char'
keyword "->"
e <- ambiguous
pure $ Pat c ns e
where
pair = try $ between (token '(') (token ')') do
e1 <- identifier
token ','
e2 <- identifier
pure (CPair, [e1, e2])
unit = (CUnit, []) <$ keyword "()"
left = do
keyword "Left"
e <- identifier
pure (CLeft, [e])
right = do
keyword "Right"
e <- identifier
pure (CRight, [e])
zero = (CZero, []) <$ keyword "Z"
succ = do
keyword "S"
e <- identifier
pure (CSucc, [e])
nil = (CNil, []) <$ keyword "[]"
cons = try $ between (token '(') (token ')') do
e1 <- identifier
keyword "::"
e2 <- identifier
pure (CCons, [e1, e2])
char' = do
keyword "Char"
e <- identifier
pure (CChar, [e])
case_ :: Parser AST
case_ = label "case patterns" $ do
token '{'
pats <- sepEndBy pat (token ';')
token '}'
pure $ Case pats
2021-03-26 14:55:23 -07:00
ann :: Parser AST
ann = label "type annotation" $ do
e <- block
token ':'
t <- tambiguous
pure (Ann () e t)
hole :: Parser AST
hole = label "hole" $ HoleP <$ token '_'
2021-03-26 14:55:23 -07:00
tlist :: Parser Type
tlist = between (token '[') (token ']') $ ((TApp TList <$> tambiguous) <|> pure TList)
tinfix :: Parser Type
tinfix = buildExpressionParser ttable tblock
where
ttable :: [[Operator Text () Identity Type]]
ttable = [ [Infix (binop TAbs <$ arrSym) AssocRight]
, [Infix (binop TProd <$ token '*') AssocRight]
, [Infix (binop TSum <$ token '+') AssocRight]
]
arrSym :: Parser ()
arrSym = token '→' <|> keyword "->"
binop :: Type -> Type -> Type -> Type
binop c t1 t2 = TApp (TApp c t1) t2
tctr :: Parser Type
tctr = tlist <|> tunit <|> tvoid <|> tnat <|> tchar
where
tunit = TUnit <$ (keyword "Unit" <|> keyword "")
tvoid = TVoid <$ (keyword "Void" <|> keyword "")
tnat = TNat <$ (keyword "Nat" <|> keyword "")
tchar = TChar <$ keyword "Char"
tfinite :: Parser Type
tfinite = tvariable <|> tlist <|> tctr <|> tgrouping
tblock :: Parser Type
tblock = tfinite
tambiguous :: Parser Type
tambiguous = try tinfix <|> try tapplication <|> tblock
tforall :: Parser Scheme
tforall = do
keyword "forall" <|> token '∀'
names <- many1 (identifier <* spaces)
token '.'
ty <- tambiguous
pure $ TForall names ty
scheme :: Parser Scheme
scheme = tforall <|> (TForall [] <$> tambiguous)
-- | Guaranteed to consume a finite amount of input
finite :: Parser AST
finite = label "finite expression" $ variable <|> hole <|> ctr <|> case_ <|> grouping
-- | Guaranteed to consume input, but may continue until it reaches a terminator
block :: Parser AST
block = label "block expression" $ abstraction <|> let_ <|> finite
-- | Not guaranteed to consume input at all, may continue until it reaches a terminator
ambiguous :: Parser AST
2021-03-26 14:55:23 -07:00
ambiguous = label "any expression" $ try ann <|> try application <|> block
typeParser :: Parser Type
typeParser = tambiguous
schemeParser :: Parser Scheme
schemeParser = scheme
astParser :: Parser AST
astParser = ambiguous
parseAST :: Text -> Either ParseError AST
parseAST = parse (spaces *> ambiguous <* eof) "input"
2021-03-26 14:55:23 -07:00
type Declaration = (Text, Maybe Type, AST)
definitionAnn :: Parser Declaration
definitionAnn = do
name <- identifier
ty <- optionMaybe $ token ':' *> tambiguous
token '='
e <- ambiguous
pure (name, ty, e)
declaration :: Parser Declaration
declaration = notFollowedBy (try let_) >> (declrec <|> decl)
where
declrec = do
try $ keyword "letrec"
2021-03-26 14:55:23 -07:00
(name, ty, expr) <- definitionAnn
pure (name, ty, LetRecP (name, expr) (Var name))
decl = do
keyword "let"
2021-03-26 14:55:23 -07:00
definitionAnn
-- | A program is a series of declarations and expressions to execute.
type ProgramAST = [Declaration]
type TopLevelAST = [Either Declaration AST]
topLevel :: Parser (Either Declaration AST)
topLevel = try (Left <$> declaration) <|> (Right <$> ambiguous)
topLevelParser :: Parser TopLevelAST
topLevelParser = spaces *> sepEndBy topLevel (token ';') <* eof
shebang :: Parser ()
shebang = do
try $ keyword "#!"
skipMany (noneOf "\n")
spaces
programParser :: Parser ProgramAST
programParser = shebang *> sepEndBy declaration (token ';') <* eof
parseTopLevel :: Text -> Either ParseError TopLevelAST
parseTopLevel = parse topLevelParser "input"
parseProgram :: Text -> Either ParseError ProgramAST
parseProgram = parse programParser "input"