WIP on new syntax

new-syntax-wip
James T. Martin 2021-03-27 20:13:25 -07:00
parent b337ecb094
commit b53b1eb4d7
Signed by: james
GPG Key ID: 4B7F3DA9351E577C
4 changed files with 1610 additions and 495 deletions

View File

@ -41,6 +41,7 @@ default-extensions:
- PatternSynonyms
- ScopedTypeVariables
- StandaloneDeriving
- TupleSections
- TypeFamilies
- ViewPatterns

File diff suppressed because it is too large Load Diff

View File

@ -1,305 +1,604 @@
module Ivo.Syntax.Parser
( ParseError, parse
, Declaration, TopLevelAST, ProgramAST
, parseAST, parseTopLevel, parseProgram
, typeParser, schemeParser, astParser, topLevelParser, programParser
, programParser, replParser
) where
import Ivo.Syntax.Base
import Data.Functor.Identity (Identity)
import Data.List.NonEmpty (fromList)
import Data.Text qualified as T
import Prelude hiding (succ, either)
import Text.Parsec hiding (label, token, spaces)
import Text.Parsec hiding (label, token, spaces, tokens, anyToken)
import Text.Parsec qualified
import Text.Parsec.Expr
import Text.Parsec.Text (Parser)
spaces :: Parser ()
spaces = Text.Parsec.spaces >> optional (try (comment >> spaces))
where
comment, lineComment, blockComment :: Parser ()
comment = blockComment <|> lineComment
lineComment = label "line comment" $ do
_ <- try (string "//")
_ <- many1 (noneOf "\n")
pure ()
blockComment = label "block comment" $ do
_ <- try (string "/*")
_ <- many1 $ notFollowedBy (string "*/") >> anyChar
_ <- string "*/"
pure ()
-- | Parse the contents of a file containing Ivo source code,
-- with an optional shebang.
--
-- See 'Scope'.
programParser :: Parser Scope
programParser = do
optional shebang
scope
label :: String -> Parser a -> Parser a
label = flip Text.Parsec.label
-- | Parse REPL input (which consists of top-level statements and expressions).
--
-- See 'TopLevel' and 'Expr'.
replParser :: Parser [Either TopLevel Expr]
replParser = sepEndBy ((Left <$> topLevel) <|> (Right <$> ambiguous)) (token ";")
token :: Char -> Parser ()
token ch = label [ch] $ char ch *> spaces
keywords :: [Text]
keywords = ["let", "in", "Left", "Right", "S", "Z", "forall", "Char", "Void", "Unit", "Nat", "Char"]
-- | A keyword is an exact string which is not part of an identifier.
keyword :: Text -> Parser ()
keyword kwd = label (T.unpack kwd) $ do
try do
_ <- string $ T.unpack kwd
notFollowedBy letter
spaces
-- | An identifier is a sequence of letters which is not a keyword.
identifier :: Parser Text
identifier = label "identifier" $ do
notFollowedBy anyKeyword
T.pack <$> (many1 letter <* spaces)
where anyKeyword = choice $ map keyword keywords
variable :: Parser AST
variable = label "variable" $ Var <$> identifier
tvariable :: Parser Type
tvariable = label "variable" $ TVar <$> identifier
many1' :: Parser a -> Parser (NonEmpty a)
many1' p = fromList <$> many1 p
many2 :: Parser a -> Parser (a, NonEmpty a)
many2 p = (,) <$> p <*> many1' p
grouping :: Parser AST
grouping = label "grouping" $ between (token '(') (token ')') ambiguous
tgrouping :: Parser Type
tgrouping = label "grouping" $ between (token '(') (token ')') tambiguous
application :: Parser AST
application = label "application" $ uncurry App <$> many2 block
tapplication :: Parser Type
tapplication = label "application" $ uncurry tapp' <$> many2 tblock
where tapp' t1 (t2 :| ts) = tapp (t1 : t2 : ts)
abstraction :: Parser AST
abstraction = label "lambda abstraction" $ Abs <$> between lambda (token '.') (many1' identifier) <*> ambiguous
where lambda = label "lambda" $ (char '\\' <|> char 'λ') *> spaces
definition :: Parser (Def Parse)
definition = label "definition" $ do
name <- identifier
token '='
value <- ambiguous
pure (name, value)
let_ :: Parser AST
let_ = label "let expression" $ letrecstar <|> letstar
where
letrecstar = LetRecP <$> between (try (keyword "letrec")) (keyword "in") definition <*> ambiguous
letstar = Let <$> between (keyword "let") (keyword "in") definitions <*> ambiguous
definitions :: Parser (NonEmpty (Def Parse))
definitions = fromList <$> sepBy1 definition (token ';')
ctr :: Parser AST
ctr = label "data constructor" $ pair <|> unit <|> either <|> nat <|> list <|> str
where
unit, pairCtr, tuple, either, left, right,
zero, succ, natLit, consCtr, cons, charCtr, charLit, strLit :: Parser AST
unit = Ctr CUnit [] <$ keyword "()"
pair = pairCtr <|> tuple
pairCtr = Ctr CPair [] <$ keyword "(,)"
tuple = try $ between (token '(') (token ')') do
e1 <- ambiguous
token ','
e2 <- ambiguous
pure $ Ctr CPair [e1, e2]
either = left <|> right
left = Ctr CLeft [] <$ keyword "Left"
right = Ctr CRight [] <$ keyword "Right"
nat = zero <|> succ <|> natLit
zero = Ctr CZero [] <$ keyword "Z"
succ = Ctr CSucc [] <$ keyword "S"
natLit = (PNat . read <$> many1 digit) <* spaces
list = cons <|> consCtr <|> listLit
consCtr = Ctr CCons [] <$ keyword "(::)"
cons = try $ between (token '(') (token ')') do
e1 <- ambiguous
keyword "::"
e2 <- ambiguous
pure $ Ctr CCons [e1, e2]
listLit = fmap PList $ between (token '[') (token ']') $ sepEndBy ambiguous (token ',')
str = charCtr <|> charLit <|> strLit
charCtr = Ctr CChar [] <$ keyword "Char"
charLit = fmap PChar $ char '\'' *> anyChar <* spaces
strLit = fmap (PStr . T.pack) $ between (token '"') (token '"') $ many (noneOf "\"")
pat :: Parser (Pat Parse)
pat = label "case alternate" $ do
(c, ns) <- label "pattern" $
pair <|> unit <|> left <|> right <|> zero <|> succ <|> nil <|> cons <|> char'
keyword "->"
e <- ambiguous
pure $ Pat c ns e
where
pair = try $ between (token '(') (token ')') do
e1 <- identifier
token ','
e2 <- identifier
pure (CPair, [e1, e2])
unit = (CUnit, []) <$ keyword "()"
left = do
keyword "Left"
e <- identifier
pure (CLeft, [e])
right = do
keyword "Right"
e <- identifier
pure (CRight, [e])
zero = (CZero, []) <$ keyword "Z"
succ = do
keyword "S"
e <- identifier
pure (CSucc, [e])
nil = (CNil, []) <$ keyword "[]"
cons = try $ between (token '(') (token ')') do
e1 <- identifier
keyword "::"
e2 <- identifier
pure (CCons, [e1, e2])
char' = do
keyword "Char"
e <- identifier
pure (CChar, [e])
case_ :: Parser AST
case_ = label "case patterns" $ do
token '{'
pats <- sepEndBy pat (token ';')
token '}'
pure $ Case pats
ann :: Parser AST
ann = label "type annotation" $ do
e <- block
token ':'
t <- tambiguous
pure (Ann () e t)
hole :: Parser AST
hole = label "hole" $ HoleP <$ token '_'
tlist :: Parser Type
tlist = between (token '[') (token ']') $ ((TApp TList <$> tambiguous) <|> pure TList)
tinfix :: Parser Type
tinfix = buildExpressionParser ttable tblock
where
ttable :: [[Operator Text () Identity Type]]
ttable = [ [Infix (binop TAbs <$ arrSym) AssocRight]
, [Infix (binop TProd <$ token '*') AssocRight]
, [Infix (binop TSum <$ token '+') AssocRight]
]
arrSym :: Parser ()
arrSym = token '→' <|> keyword "->"
binop :: Type -> Type -> Type -> Type
binop c t1 t2 = TApp (TApp c t1) t2
tctr :: Parser Type
tctr = tlist <|> tunit <|> tvoid <|> tnat <|> tchar
where
tunit = TUnit <$ (keyword "Unit" <|> keyword "")
tvoid = TVoid <$ (keyword "Void" <|> keyword "")
tnat = TNat <$ (keyword "Nat" <|> keyword "")
tchar = TChar <$ keyword "Char"
tfinite :: Parser Type
tfinite = tvariable <|> tlist <|> tctr <|> tgrouping
tblock :: Parser Type
tblock = tfinite
tambiguous :: Parser Type
tambiguous = try tinfix <|> try tapplication <|> tblock
tforall :: Parser Scheme
tforall = do
keyword "forall" <|> token '∀'
names <- many1 (identifier <* spaces)
token '.'
ty <- tambiguous
pure $ TForall names ty
scheme :: Parser Scheme
scheme = tforall <|> (TForall [] <$> tambiguous)
-- | Guaranteed to consume a finite amount of input
finite :: Parser AST
finite = label "finite expression" $ variable <|> hole <|> ctr <|> case_ <|> grouping
-- | Guaranteed to consume input, but may continue until it reaches a terminator
block :: Parser AST
block = label "block expression" $ abstraction <|> let_ <|> finite
-- | Not guaranteed to consume input at all, may continue until it reaches a terminator
ambiguous :: Parser AST
ambiguous = label "any expression" $ try ann <|> try application <|> block
typeParser :: Parser Type
typeParser = tambiguous
schemeParser :: Parser Scheme
schemeParser = scheme
astParser :: Parser AST
astParser = ambiguous
parseAST :: Text -> Either ParseError AST
parseAST = parse (spaces *> ambiguous <* eof) "input"
type Declaration = (Text, Maybe Type, AST)
definitionAnn :: Parser Declaration
definitionAnn = do
name <- identifier
ty <- optionMaybe $ token ':' *> tambiguous
token '='
e <- ambiguous
pure (name, ty, e)
declaration :: Parser Declaration
declaration = notFollowedBy (try let_) >> (declrec <|> decl)
where
declrec = do
try $ keyword "letrec"
(name, ty, expr) <- definitionAnn
pure (name, ty, LetRecP (name, expr) (Var name))
decl = do
keyword "let"
definitionAnn
-- | A program is a series of declarations and expressions to execute.
type ProgramAST = [Declaration]
type TopLevelAST = [Either Declaration AST]
topLevel :: Parser (Either Declaration AST)
topLevel = try (Left <$> declaration) <|> (Right <$> ambiguous)
topLevelParser :: Parser TopLevelAST
topLevelParser = spaces *> sepEndBy topLevel (token ';') <* eof
---
--- This is a very long module which does not even document the
--- syntactic constructs which it is implementing.
--- On the other hand, 'Ivo.Syntax.Base' goes into great detail'
--- about the syntax while hardly containing any actual code.
--- Thus, to assist with navigation, the order
--- in which parsers are defined in this module
--- is the same as the order in which they are described in 'Ivo.Syntax.Base'.
--- I would recommend keeping both files open at once while working on this code
--- so you can cross-reference the informal description of the syntax
--- with the parsers.
---
shebang :: Parser ()
shebang = do
shebang = label "shebang" do
try $ keyword "#!"
skipMany (noneOf "\n")
spaces
programParser :: Parser ProgramAST
programParser = shebang *> sepEndBy declaration (token ';') <* eof
-- | 'Scope'
scope :: Parser Scope
scope = Scope <$> many (topLevel <* token ";")
parseTopLevel :: Text -> Either ParseError TopLevelAST
parseTopLevel = parse topLevelParser "input"
topLevel, tlOpen, tlDef, tlData, tlAxiom, tlSig :: Parser TopLevel
parseProgram :: Text -> Either ParseError ProgramAST
parseProgram = parse programParser "input"
-- | 'TopLevel'
topLevel = tlOpen <|> tlDef <|> tlData <|> tlAxiom <|> tlSig
-- | 'Open'
tlOpen = label "open statement" $
Open <$> publicOrPrivate
<*> ambiguous
<*> openUsing
<*> openHiding
<*> openRenaming
where
openUsing, openHiding :: Parser [(Text, Maybe (Maybe [Text]))]
openRenaming :: Parser [(Text, Text)]
openUsing = option [] do
keyword "using"
flexibleParens openId
openHiding = option [] do
keyword "hiding"
flexibleParens openId
openId = do
name <- identifier
ctrs <- optionMaybe $ openAll <|> openCtrs
pure (name, ctrs)
openRenaming = option [] do
keyword "renaming"
flexibleParens $ cobetween identifier tkArr identifier
openAll, openCtrs :: Parser (Maybe [Text])
openAll = Nothing <$ keyword ".."
openCtrs = optionMaybe $ flexibleParens identifier
-- | 'Define'
tlDef = Define <$> publicOrPrivate <*> definition
-- | 'TLData'
tlData = TLData <$> data_
-- | 'TLAxiom'
tlAxiom = TLAxiom <$> axiom
-- | 'TLSig'
tlSig = TLSig <$> sig
-- | 'PublicOrPrivate'
publicOrPrivate :: Parser PublicOrPrivate
publicOrPrivate = (Public <$ keyword "pub") <|> pure Private
-- | 'Definition'
definition :: Parser Definition
definition = label "definition" do
name <- identifier
ty <- optionMaybe typeSig
token "="
expr <- ambiguous
pure $ Definition name ty expr
ambiguous, block, finite :: Parser Expr
-- | 'Expr's which are not guaranteed to consume input before recursing.
ambiguous = label "expression" $
exAccess <|> exArrow <|> exAnn <|> exAppI <|> exApp <|> exTypeApp
-- | 'Expr's with a terminal symbol on its left.
block = label "block expression" $
exData <|> exAxiom <|> exForall <|> exTypeLam <|> exBlockLam <|>
exLet <|> exBlockCase <|> finite
-- | 'Expr's with a terminal symbol on both the left /and/ right.
finite = label "finite expression" $
exSig <|> exMod <|> exHole <|> exFiniteCase <|> exFiniteLam <|>
exLit <|> exVar <|> exGroup
exAccess, exArrow, exAnn, exAppI, exApp, exTypeApp,
exData, exAxiom, exForall, exTypeLam, exBlockLam, exFiniteLam, exLet,
exSig, exMod, exHole, exBlockCase, exFiniteCase, exLit, exVar, exGroup
:: Parser Expr
exGroup = label "parenthetical expression" $ betweenParens ambiguous
typeSig, kindSig, modSig :: Parser Type
typeSig = label "type signature" $ keyword ":" *> ambiguous
kindSig = label "kind signature" typeSig
modSig = label "signature" typeSig
-- | 'Data'
exData = Data <$> data_
data_ :: Parser Expr
data_ = label "data expression" do
keyword "data"
ambiguous
-- | 'Axiom'
exAxiom = Axiom <$> axiom
axiom :: Parser Expr
axiom = label "axiom expression" do
keyword "axiom"
ambiguous
-- | 'SigE'
exSig = SigE <$> sig
-- | 'Mod'
exMod = label "module expression" do
keyword "mod"
params <- sigParams
sig <- optionMaybe modSig
body <- flexibleBraces topLevel
pure $ Mod params sig body
-- | 'Access'
exAccess = label "field access" $ try do
expr <- block
tkColCol
name <- identifier
pure $ Access expr name
-- | @∷@
tkColCol :: Parser ()
tkColCol = label "" $ token "" <|> token "::"
-- | 'Forall'
exForall = label "∀ type" $ do
tkForall
uncurry Forall <$> typeBinder
-- | @∀@
tkForall :: Parser ()
tkForall = label "" $ keyword "" <|> keyword "forall"
-- | Everything about @forall@ and @^@ is the same
-- except for the token and whether a type or expr
-- (which are also syntactically the same object)
-- comes next.
--
-- This factors out all of that stuff they have in common.
typeBinder :: Parser (NonEmpty (Either (NonEmpty Text, Kind) Text), Expr)
typeBinder = do
vars <- many1' typeBinder'
tkArr
ty <- ambiguous
pure (vars, ty)
where
typeBinder', kindedBinder, plainBinder
:: Parser (Either (NonEmpty Text, Kind) Text)
typeBinder' = kindedBinder <|> plainBinder
kindedBinder = betweenParens $ do
names <- many1' identifier
kind <- kindSig
pure $ Left (names, kind)
plainBinder = Right <$> identifier
-- | 'Arrow'
exArrow = label "→ type" $ try do
(names, argTy) <- arrowArgs
tkArr
rTy <- ambiguous
pure $ Arrow names argTy rTy
where
arrowArgs, namedArgs, typeArg :: Parser ([Text], Type)
arrowArgs = namedArgs <|> typeArg
namedArgs = betweenParens do
names <- many1 identifier
ty <- kindSig
pure (names, ty)
typeArg = ([],) <$> block
-- | @→@
tkArr :: Parser ()
tkArr = label "" $ keyword "" <|> keyword "->"
-- | 'Ann'
exAnn = label "annotated expression" $ try do
expr <- block
ty <- typeSig
pure $ Ann expr ty
-- | 'Hole'
exHole = label "hole" $ Hole <$ keyword "_"
-- | 'TypeLam'
exTypeLam = label "type Λ" $ do
tkBigLam
uncurry TypeLam <$> typeBinder
-- | @Λ@
tkBigLam :: Parser ()
tkBigLam = label "Λ" $ keyword "Λ" <|> keyword "^"
-- | 'TypeApp'
exTypeApp = label "type application" $ try do
expr <- block
token "@"
arg <- ambiguous
pure $ TypeApp expr arg
-- | 'Lam' with only one undelimited case branch
exBlockLam = label "λ expression" $ try do
args <- many pattern_
tkArr
body <- ambiguous
pure $ LamBlock args body
-- | 'Lam' with delimited case branches
exFiniteLam = label "λ-case expression" $ try do
simpleArgs <- many pattern_
body <- caseBranches
pure $ Lam simpleArgs body
tkLam :: Parser ()
tkLam = label "λ" $ keyword "λ" <|> keyword "\\"
-- | 'App'
exApp = label "application" $ try do
ef <- finite
exs <- many finite
exfinal <- block
pure $ App ef (snoc' exs exfinal)
-- | 'AppI'
exAppI = label "infix application" $
fail "infix expressions not yet supported"
-- | 'Let'
exLet = label "let expression" do
keyword "let"
decls <- flexibleSepBy1 (token ";") topLevel
keyword "in"
body <- ambiguous
pure $ Let decls body
-- | 'Case' with only one undelimited case branch
exBlockCase = label "case expression" $ try do
keyword "case"
exprs <- flexibleSepBy (keyword "|") ambiguous
body <- caseBranches
pure $ Case exprs body
-- | 'Case'
exFiniteCase = label "case-of expression" $ try do
keyword "case"
exprs <- flexibleSepBy1 (keyword "|") ambiguous
keyword "of"
pats <- flexibleSepBy1 (keyword "|") pattern_
tkArr
body <- ambiguous
pure $ CaseBlock exprs pats body
-- | 'Lit'
exLit = label "literal" $ Lit <$> lit ambiguous
-- | 'Var'
exVar = label "variable" $ Var <$> identifier
-- | 'Sig'
sig, sigADT, sigGADT, sigSig :: Parser Sig
sig = label "signature" $ sigADT <|> sigGADT <|> sigSig
sigParams :: Parser [Either (NonEmpty Text, Kind) Text]
sigParams = label "signature parameters" $ many $ annParam <|> unannParam
where
annParam, unannParam :: Parser (Either (NonEmpty Text, Kind) Text)
annParam = betweenParens do
names <- many1' identifier
kind <- kindSig
pure $ Left (names, kind)
unannParam = Right <$> identifier
-- | 'ADT'
sigADT = label "adt" do
keyword "adt"
name <- identifier
params <- sigParams
kind <- optionMaybe kindSig
(codataQ, body) <- betweenBraces $
emptyData <|> emptyCodata <|> dataCtrs <|> codataElims
pure $ SigQs params $ ADT codataQ name kind body
where
emptyData, emptyCodata, dataCtrs, codataElims
:: Parser (Bool, [(Text, [Type])])
emptyData = try $ (False, []) <$ keyword "+"
emptyCodata = try $ (True, []) <$ keyword "&"
dataCtrs = try $ (False,) <$> flexibleSepBy1 (keyword "+") do
name <- identifier
ctrs <- many $ notFollowedBy (keyword "+") >> finite
pure (name, ctrs)
codataElims = try $ (True,) <$> flexibleSepBy1 (keyword "&") do
name <- identifier
elims <- many $ notFollowedBy (keyword "&") >> finite
pure (name, elims)
-- | 'GADT'
sigGADT = label "gadt" do
keyword "gadt"
name <- identifier
params <- sigParams
kind <- optionMaybe kindSig
ctrs <- flexibleBraces declaration
pure $ SigQs params $ GADT name kind ctrs
-- | 'Sig'
sigSig = label "sig" do
keyword "sig"
params <- sigParams
ctrs <- flexibleBraces $ (Left <$> declaration) <|> (Right <$> topLevel)
pure $ SigQs params $ Sig ctrs
declaration :: Parser (NonEmpty Text, Type)
declaration = label "declaration" $ (,) <$> many1' identifier <*> typeSig
pattern_, finitePattern, patVar, patIrr, patImp, patApp,
patAppI, patView, patLit, patGroup :: Parser Pattern
-- | 'Pattern'
pattern_ = label "pattern" $
patApp <|> patAppI <|> patView <|> finitePattern
finitePattern =
patIrr <|> patImp <|> patLit <|> patVar <|> patGroup
patGroup = betweenParens pattern_
-- | 'PatVar'
patVar = label "pattern variable" $ PatVar <$> identifier <*> optionMaybe typeSig
-- | 'Irrelevant'
patIrr = label "irrelevant pattern" $ Irrelevant <$ keyword "_"
-- | 'Impossible'
patImp = label "impossible pattern" $ Impossible <$ keyword "!"
-- | 'PatApp'
patApp = label "pattern application" $ do
name <- identifier
pats <- many1 finitePattern
pure $ PatApp name pats
-- FIXME: Implement infix pattern application
-- | 'PatAppI'
patAppI = label "infix pattern application" $
fail "infix pattern application not yet implemented"
-- | 'View'
patView = label "view pattern" $ try $ View <$> block <*> (tkArr *> pattern_)
-- | 'PatLit'
patLit = label "literal pattern" $ PatLit <$> lit pattern_
caseBranches :: Parser [CaseBranch]
caseBranches = label "case branches" $ flexibleBraces caseBranch
caseBranch :: Parser CaseBranch
caseRefinement, caseGuards, casePlain :: [Pattern] -> Parser CaseBranch
-- | 'CaseBranch'
caseBranch = label "case branch" do
pats <- many1 pattern_
caseRefinement pats <|> caseGuards pats <|> casePlain pats
-- FIXME: Implement case refinement
-- | 'Refinement'
caseRefinement pats = label "case refinement" $
fail "case refinement not implemented"
-- FIXME: Implement case guards
-- | 'Guards'
caseGuards pats = label "guard clauses" $
fail "case guards not yet implemented"
-- | 'PlainCase'
casePlain pats = do
tkArr
PlainCase pats <$> ambiguous
-- FIXME: Implement guard clauses
-- | 'Guard'
guard :: Parser Guard
guard = label "guard clause" $
fail "guard clauses not yet implemented"
lit, litList :: Parser a -> Parser (Lit a)
litInt, litChar, litStr :: Parser (Lit a)
-- | 'Lit'
lit m = label "literal" $ litInt <|> litChar <|> litStr <|> litList m
-- | 'LitInt'
litInt = label "integer literal" $ LitInt . read <$> try do
sign <- optionMaybe $ char '+' <|> char '-'
digits <- many1 digit
pure $ maybe digits (: digits) sign
-- | 'LitChar'
litChar = label "character literal" $
fmap LitChar $ between (char '\'') (char '\'') $ noneOf "\\'\n"
-- | 'LitStr'
litStr = label "string literal" $ fmap (LitStr . T.pack) $
between (char '"') (char '"') $ many1 $ noneOf "\\\"\n"
-- | 'LitList'
litList m = label "list literal" $ LitList <$> flexibleBrackets m
-- | 'FlexibleSeparatorSyntax'
flexibleSepBy :: Parser a -> Parser b -> Parser [b]
flexibleSepBy delim m = do
optional delim
sepEndBy m delim
-- | 'FlexibleSeparatorSyntax'
flexibleSepBy1 :: Parser a -> Parser b -> Parser [b]
flexibleSepBy1 delim m = do
optional delim
sepEndBy1 m delim
flexibleParens, flexibleBrackets, flexibleBraces :: Parser a -> Parser [a]
-- | 'FlexibleSeparatorSyntax', with the @;@ separator between @(@ @)@
flexibleParens = betweenParens . flexibleSepBy (token ";")
-- | 'FlexibleSeparatorSyntax', with the @;@ separator between @[@ @]@
flexibleBrackets = betweenBrackets . flexibleSepBy (token ";")
-- | 'FlexibleSeparatorSyntax', with the @;@ separator between @{@ @}@
flexibleBraces = betweenBraces . flexibleSepBy (token ";")
---
--- Helper parsers which aren't explicitly specified by the AST
---
-- | An identifier is a sequence of characters with no token which is not a keyword.
identifier :: Parser Text
identifier = label "identifier" $ try do
notFollowedBy anyKeyword
name <- T.pack <$> (notFollowedBy anyToken *> many1 anyChar <* spaces)
if any ((`elem` keywords) . T.unpack) $ T.splitOn "_" name
then fail "identifier contained keywords"
else pure name
token :: String -> Parser ()
token tk = label tk $ string tk *> spaces
anyToken :: Parser ()
anyToken = label "any token" $ choice $ map token tokens
-- | Tokens which are reserved by syntax
-- and may not appear /anywhere/ in an identifier.
tokens :: [String]
tokens =
[ "//", "/*", "*/"
, "(", ")", "{", "}"
, ";", "", "::", "@"
, " ", "\r", "\n", "\t"
, "```"
]
-- | A keyword is an exact string which is not part of an identifier.
keyword :: String -> Parser ()
keyword kwd = label kwd $ do
try do
_ <- string kwd
notFollowedBy (notFollowedBy anyToken >> anyChar)
spaces
anyKeyword :: Parser ()
anyKeyword = label "any keyword" $ choice $ map keyword keywords
-- | Keywords are reserved by syntax and are not allowed to be used as identifiers
-- or as a part of a `_`-separated identifier.
--
-- However, they may be used as /part/ of an identifier.
keywords :: [String]
keywords =
[ "open", "using", "hiding", "renaming", "..", "pub"
, "data", "axiom", "adt", "gadt", "sig", "mod"
, "forall"
, "let", "in", "case"
, "[", "]"
, ":", "", "Λ", "^", "λ", "\\", "", "->"
, "|", "?", "!", "_", "..."
]
spaces :: Parser ()
spaces = label "whitespace or comments" $
Text.Parsec.spaces >> optional (try (comment >> spaces))
comment, lineComment, blockComment :: Parser ()
comment = label "comment" $ lineComment <|> blockComment
lineComment = label "line comment" $ do
_ <- try (string "//")
_ <- many1 (noneOf "\n")
pure ()
blockComment = label "block comment" $ do
_ <- try (string "/*")
_ <- many1 $ notFollowedBy (string "*/") >> anyChar
_ <- string "*/"
pure ()
-- | The opposite of `between`: ignore the middle, keep the sides
cobetween :: Parser a -> Parser b -> Parser c -> Parser (a, c)
cobetween a b c = do
x <- a
_ <- b
y <- c
pure (x, y)
-- the parens/brackets/braces word choice is totally meaningless
betweenParens, betweenBrackets, betweenBraces :: Parser a -> Parser a
-- | Between @(@ @)@
betweenParens = between (token "(") (token ")")
-- | Between @[@ @]@
betweenBrackets = between (keyword "[") (keyword "]")
-- | Between @{@ @}@
betweenBraces = between (keyword "{") (keyword "}")
many1' :: Parser a -> Parser (NonEmpty a)
many1' = fmap fromList . many1
snoc' :: [a] -> a -> NonEmpty a
snoc' xs x = fromList $ xs ++ [x]
label :: String -> Parser a -> Parser a
label = flip Text.Parsec.label

View File

@ -1,4 +1,7 @@
module Ivo.Syntax.Printer (unparseAST, unparseType, unparseScheme) where
-- | Turn abstract syntax into the corresponding concrete syntax.
--
-- This is /not/ a pretty-printer; it uses minimal whitespace)
module Ivo.Syntax.Printer (unparseScope, unparseTopLevel, unparseExpr) where
import Ivo.Syntax.Base
@ -10,6 +13,99 @@ import Data.Text.Lazy (fromStrict, toStrict, intercalate, unwords, singleton)
import Data.Text.Lazy.Builder (Builder, fromText, fromLazyText, toLazyText, fromString)
import Prelude hiding (unwords)
unparseScope :: Scope -> Text
unparseScope = unambiguous . scope
unparseTopLevel :: TopLevel -> Text
unparseTopLevel = unambiguous . topLevel
unparseExpr :: Expr -> Text
unparseExpr = unambiguous . expr
type Unparser a = a -> Tagged Builder
scope :: Unparser Scope
scope (Scope tls) = tag Ambiguous $ unmany (topLevel <? ";\n") tls
(<?) :: Unparser a -> Text -> Unparser a
u <? txt = \x -> u x <> fromText txt
topLevel :: Unparser TopLevel
topLevel = \case
Open pub expr use hide rename ->
publicOrPrivate pub <>
unambiguousExpr expr <> " " <>
openUsing use <> " "
openHiding hide <> " "
openRenaming rename
Define pub def ->
publicOrPrivate pub <>
definition def
TLData expr -> data_ expr
TLAxiom expr -> axiom expr
TLSig s -> sig s
publicOrPrivate :: PublicOrPrivate -> Builder
publicOrPrivate Public = "pub "
publicOrPrivate Private = ""
openUsing, openHiding :: [(Text, Maybe (Maybe [Text]))] -> Builder
-- FIXME
openUsing _ = "using (some stuff) "
-- FIXME
openHiding _ = "hiding (some stuff) "
-- FIXME
openRenaming :: [(Text, Text)]
openRenaming _ = "renaming (some stuff) "
definition :: Unparser Definition
definition (Definition name ty expr) =
fromText name <> " " <> typeSig ty <> "= " <> unambiguousExpr expr
expr :: Unparser Expr
expr = \case
Data expr -> tag Block $ data_ expr
Axiom expr -> tag Block $ axiom expr
SigE s -> tag Block $ sig s
-- FIXME
Mod _ _ _ -> tag Block $ error "I don't know how to unparse modules"
Access expr name -> tag Ambiguous $ blockExpr expr <> "::" <> fromText name
Forall binders ty -> tag Block $ "" <> typeBinders binders <> "" <> ambiguousExpr ty
Arrow [] arg ty -> tag Ambiguous $ blockExpr arg <> "" <> ambiguousExpr ty
Ann expr ty -> tag Ambiguous $ blockExpr expr <> ": " <> ambiguousExpr ty
Hole -> tag Finite "_"
TypeLam binders ty -> tag Block $ "Λ " <> typeBinders binders <> "" <> ambiguousExpr ty
TypeApp expr ty -> tag Ambiguous $ blockExpr <> "@" <> finiteExpr ty
Lam pats cases -> tag Block $ "λ " <> unmany1 pattern_ pats <> caseBranches cases
data_ = undefined
axiom = undefined
typeBinders :: Unparser (NonEmpty (Either (NonEmpty Text, Kind) Text))
typeBinders = error "Whatever"
sig :: Unparser Sig
sig = error "I don't know how to unparse signatures"
pattern_ :: Unparser Pattern
pattern_ = error "I don't know how to unparse patterns"
caseBranch :: Unparser CaseBranch
caseBranch = error "I don't know how to unparse case branches"
caseBranches :: Unparser [CaseBranch]
caseBranches = error "I don't know how to unparse case branches"
guard :: Unparser Guard
guard = error "I don't know how to unparse guard clauses"
lit :: Unparser a -> Unparser (Lit a)
lit u = error "I don't know how to unparse literals"
-- I'm surprised this isn't in base somewhere.
unsnoc :: NonEmpty a -> ([a], a)
unsnoc = cata \case
@ -47,78 +143,13 @@ ambiguous :: Tagged Builder -> Builder
ambiguous (Finite, t) = t
ambiguous (_, t) = group t
-- | Turn an abstract syntax tree into the corresponding concrete syntax.
--
-- This is *not* a pretty-printer; it uses minimal whitespace.
unparseAST :: AST -> Text
unparseAST = toStrict . toLazyText . snd . cata \case
VarF name -> tag Finite $ fromText name
AppF ef exs -> unparseApp ef exs
AbsF names body -> tag Block $
let names' = fromLazyText (unwords $ map fromStrict $ toList names)
in "λ" <> names' <> ". " <> unambiguous body
LetFP defs body -> tag Block $ "let " <> unparseDefs defs <> " in " <> unambiguous body
LetRecFP def body -> tag Block $ "letrec " <> unparseDef def <> " in " <> unambiguous body
CtrF ctr e -> unparseCtr ctr e
CaseF pats ->
let pats' = fromLazyText $ intercalate "; " $ map (toLazyText . unparsePat) pats
in tag Finite $ "{ " <> pats' <> " }"
AnnF () e t -> tag Ambiguous $ final e <> " : " <> fromText (unparseType t)
PNatF n -> tag Finite $ fromString $ show n
PListF es ->
let es' = fromLazyText $ intercalate ", " $ map (toLazyText . unambiguous) es
in tag Finite $ "[" <> es' <> "]"
PStrF s -> tag Finite $ "\"" <> fromText s <> "\""
PCharF c -> tag Finite $ "'" <> fromLazyText (singleton c)
HoleFP -> tag Finite "_"
where
unparseApp :: Tagged Builder -> NonEmpty (Tagged Builder) -> Tagged Builder
unparseApp ef (unsnoc -> (exs, efinal))
= tag Ambiguous $ foldr (\e es' -> ambiguous e <> " " <> es') (final efinal) (ef : exs)
ambiguousExpr, blockExpr, finiteExpr :: Expr -> Builder
unparseDef (name, val) = fromText name <> " = " <> unambiguous val
unparseDefs defs = fromLazyText (intercalate "; " $ map (toLazyText . unparseDef) $ toList defs)
ambiguousExpr = ambiguous . expr
unparseCtr :: Ctr -> [Tagged Builder] -> Tagged Builder
-- Fully-applied special syntax forms
unparseCtr CPair [x, y] = tag Finite $ "(" <> unambiguous x <> ", " <> unambiguous y <> ")"
unparseCtr CCons [x, y] = tag Finite $ "(" <> unambiguous x <> " :: " <> unambiguous y <> ")"
-- Partially-applied syntax forms
unparseCtr CUnit [] = tag Finite "()"
unparseCtr CPair [] = tag Finite "(,)"
unparseCtr CLeft [] = tag Finite "Left"
unparseCtr CRight [] = tag Finite "Right"
unparseCtr CZero [] = tag Finite "Z"
unparseCtr CSucc [] = tag Finite "S"
unparseCtr CNil [] = tag Finite "[]"
unparseCtr CCons [] = tag Finite "(::)"
unparseCtr CChar [] = tag Finite "Char"
unparseCtr ctr (x:xs) = unparseApp (unparseCtr ctr []) (x :| xs)
blockExpr = final . expr
unparsePat (Pat ctr ns e)
= unambiguous (unparseCtr ctr (map (tag Finite . fromText) ns)) <> " -> " <> unambiguous e
finiteExpr = unambiguous . expr
-- HACK
pattern TApp2 :: Type -> Type -> Type -> Type
pattern TApp2 tf tx ty = TApp (TApp tf tx) ty
-- TODO: Improve these printers.
unparseType :: Type -> Text
unparseType (TVar name) = name
unparseType (TApp2 TAbs a b) = "(" <> unparseType a <> " -> " <> unparseType b <> ")"
unparseType (TApp2 TProd a b) = "(" <> unparseType a <> " * " <> unparseType b <> ")"
unparseType (TApp2 TSum a b) = "(" <> unparseType a <> " + " <> unparseType b <> ")"
unparseType (TApp TList a) = "[" <> unparseType a <> "]"
unparseType (TApp a b) = "(" <> unparseType a <> " " <> unparseType b <> ")"
unparseType TAbs = "(->)"
unparseType TProd = "(*)"
unparseType TSum = "(+)"
unparseType TUnit = ""
unparseType TVoid = ""
unparseType TNat = "Nat"
unparseType TList = "[]"
unparseType TChar = "Char"
unparseScheme :: Scheme -> Text
unparseScheme (TForall [] t) = unparseType t
unparseScheme (TForall names t) = "" <> T.unwords names <> ". " <> unparseType t
unmany :: Foldable t => Unparser a -> t a -> Builder
unmany u xs = error "fuck it, implement this later"