WIP on new syntax

new-syntax-wip
James T. Martin 2021-03-27 20:13:25 -07:00
parent b337ecb094
commit b53b1eb4d7
Signed by: james
GPG Key ID: 4B7F3DA9351E577C
4 changed files with 1610 additions and 495 deletions

View File

@ -41,6 +41,7 @@ default-extensions:
- PatternSynonyms - PatternSynonyms
- ScopedTypeVariables - ScopedTypeVariables
- StandaloneDeriving - StandaloneDeriving
- TupleSections
- TypeFamilies - TypeFamilies
- ViewPatterns - ViewPatterns

File diff suppressed because it is too large Load Diff

View File

@ -1,305 +1,604 @@
module Ivo.Syntax.Parser module Ivo.Syntax.Parser
( ParseError, parse ( ParseError, parse
, Declaration, TopLevelAST, ProgramAST , programParser, replParser
, parseAST, parseTopLevel, parseProgram
, typeParser, schemeParser, astParser, topLevelParser, programParser
) where ) where
import Ivo.Syntax.Base import Ivo.Syntax.Base
import Data.Functor.Identity (Identity)
import Data.List.NonEmpty (fromList) import Data.List.NonEmpty (fromList)
import Data.Text qualified as T import Data.Text qualified as T
import Prelude hiding (succ, either) import Prelude hiding (succ, either)
import Text.Parsec hiding (label, token, spaces) import Text.Parsec hiding (label, token, spaces, tokens, anyToken)
import Text.Parsec qualified import Text.Parsec qualified
import Text.Parsec.Expr
import Text.Parsec.Text (Parser) import Text.Parsec.Text (Parser)
spaces :: Parser () -- | Parse the contents of a file containing Ivo source code,
spaces = Text.Parsec.spaces >> optional (try (comment >> spaces)) -- with an optional shebang.
where --
comment, lineComment, blockComment :: Parser () -- See 'Scope'.
comment = blockComment <|> lineComment programParser :: Parser Scope
lineComment = label "line comment" $ do programParser = do
_ <- try (string "//") optional shebang
_ <- many1 (noneOf "\n") scope
pure ()
blockComment = label "block comment" $ do
_ <- try (string "/*")
_ <- many1 $ notFollowedBy (string "*/") >> anyChar
_ <- string "*/"
pure ()
label :: String -> Parser a -> Parser a -- | Parse REPL input (which consists of top-level statements and expressions).
label = flip Text.Parsec.label --
-- See 'TopLevel' and 'Expr'.
replParser :: Parser [Either TopLevel Expr]
replParser = sepEndBy ((Left <$> topLevel) <|> (Right <$> ambiguous)) (token ";")
token :: Char -> Parser () ---
token ch = label [ch] $ char ch *> spaces --- This is a very long module which does not even document the
--- syntactic constructs which it is implementing.
keywords :: [Text] --- On the other hand, 'Ivo.Syntax.Base' goes into great detail'
keywords = ["let", "in", "Left", "Right", "S", "Z", "forall", "Char", "Void", "Unit", "Nat", "Char"] --- about the syntax while hardly containing any actual code.
--- Thus, to assist with navigation, the order
-- | A keyword is an exact string which is not part of an identifier. --- in which parsers are defined in this module
keyword :: Text -> Parser () --- is the same as the order in which they are described in 'Ivo.Syntax.Base'.
keyword kwd = label (T.unpack kwd) $ do --- I would recommend keeping both files open at once while working on this code
try do --- so you can cross-reference the informal description of the syntax
_ <- string $ T.unpack kwd --- with the parsers.
notFollowedBy letter ---
spaces
-- | An identifier is a sequence of letters which is not a keyword.
identifier :: Parser Text
identifier = label "identifier" $ do
notFollowedBy anyKeyword
T.pack <$> (many1 letter <* spaces)
where anyKeyword = choice $ map keyword keywords
variable :: Parser AST
variable = label "variable" $ Var <$> identifier
tvariable :: Parser Type
tvariable = label "variable" $ TVar <$> identifier
many1' :: Parser a -> Parser (NonEmpty a)
many1' p = fromList <$> many1 p
many2 :: Parser a -> Parser (a, NonEmpty a)
many2 p = (,) <$> p <*> many1' p
grouping :: Parser AST
grouping = label "grouping" $ between (token '(') (token ')') ambiguous
tgrouping :: Parser Type
tgrouping = label "grouping" $ between (token '(') (token ')') tambiguous
application :: Parser AST
application = label "application" $ uncurry App <$> many2 block
tapplication :: Parser Type
tapplication = label "application" $ uncurry tapp' <$> many2 tblock
where tapp' t1 (t2 :| ts) = tapp (t1 : t2 : ts)
abstraction :: Parser AST
abstraction = label "lambda abstraction" $ Abs <$> between lambda (token '.') (many1' identifier) <*> ambiguous
where lambda = label "lambda" $ (char '\\' <|> char 'λ') *> spaces
definition :: Parser (Def Parse)
definition = label "definition" $ do
name <- identifier
token '='
value <- ambiguous
pure (name, value)
let_ :: Parser AST
let_ = label "let expression" $ letrecstar <|> letstar
where
letrecstar = LetRecP <$> between (try (keyword "letrec")) (keyword "in") definition <*> ambiguous
letstar = Let <$> between (keyword "let") (keyword "in") definitions <*> ambiguous
definitions :: Parser (NonEmpty (Def Parse))
definitions = fromList <$> sepBy1 definition (token ';')
ctr :: Parser AST
ctr = label "data constructor" $ pair <|> unit <|> either <|> nat <|> list <|> str
where
unit, pairCtr, tuple, either, left, right,
zero, succ, natLit, consCtr, cons, charCtr, charLit, strLit :: Parser AST
unit = Ctr CUnit [] <$ keyword "()"
pair = pairCtr <|> tuple
pairCtr = Ctr CPair [] <$ keyword "(,)"
tuple = try $ between (token '(') (token ')') do
e1 <- ambiguous
token ','
e2 <- ambiguous
pure $ Ctr CPair [e1, e2]
either = left <|> right
left = Ctr CLeft [] <$ keyword "Left"
right = Ctr CRight [] <$ keyword "Right"
nat = zero <|> succ <|> natLit
zero = Ctr CZero [] <$ keyword "Z"
succ = Ctr CSucc [] <$ keyword "S"
natLit = (PNat . read <$> many1 digit) <* spaces
list = cons <|> consCtr <|> listLit
consCtr = Ctr CCons [] <$ keyword "(::)"
cons = try $ between (token '(') (token ')') do
e1 <- ambiguous
keyword "::"
e2 <- ambiguous
pure $ Ctr CCons [e1, e2]
listLit = fmap PList $ between (token '[') (token ']') $ sepEndBy ambiguous (token ',')
str = charCtr <|> charLit <|> strLit
charCtr = Ctr CChar [] <$ keyword "Char"
charLit = fmap PChar $ char '\'' *> anyChar <* spaces
strLit = fmap (PStr . T.pack) $ between (token '"') (token '"') $ many (noneOf "\"")
pat :: Parser (Pat Parse)
pat = label "case alternate" $ do
(c, ns) <- label "pattern" $
pair <|> unit <|> left <|> right <|> zero <|> succ <|> nil <|> cons <|> char'
keyword "->"
e <- ambiguous
pure $ Pat c ns e
where
pair = try $ between (token '(') (token ')') do
e1 <- identifier
token ','
e2 <- identifier
pure (CPair, [e1, e2])
unit = (CUnit, []) <$ keyword "()"
left = do
keyword "Left"
e <- identifier
pure (CLeft, [e])
right = do
keyword "Right"
e <- identifier
pure (CRight, [e])
zero = (CZero, []) <$ keyword "Z"
succ = do
keyword "S"
e <- identifier
pure (CSucc, [e])
nil = (CNil, []) <$ keyword "[]"
cons = try $ between (token '(') (token ')') do
e1 <- identifier
keyword "::"
e2 <- identifier
pure (CCons, [e1, e2])
char' = do
keyword "Char"
e <- identifier
pure (CChar, [e])
case_ :: Parser AST
case_ = label "case patterns" $ do
token '{'
pats <- sepEndBy pat (token ';')
token '}'
pure $ Case pats
ann :: Parser AST
ann = label "type annotation" $ do
e <- block
token ':'
t <- tambiguous
pure (Ann () e t)
hole :: Parser AST
hole = label "hole" $ HoleP <$ token '_'
tlist :: Parser Type
tlist = between (token '[') (token ']') $ ((TApp TList <$> tambiguous) <|> pure TList)
tinfix :: Parser Type
tinfix = buildExpressionParser ttable tblock
where
ttable :: [[Operator Text () Identity Type]]
ttable = [ [Infix (binop TAbs <$ arrSym) AssocRight]
, [Infix (binop TProd <$ token '*') AssocRight]
, [Infix (binop TSum <$ token '+') AssocRight]
]
arrSym :: Parser ()
arrSym = token '→' <|> keyword "->"
binop :: Type -> Type -> Type -> Type
binop c t1 t2 = TApp (TApp c t1) t2
tctr :: Parser Type
tctr = tlist <|> tunit <|> tvoid <|> tnat <|> tchar
where
tunit = TUnit <$ (keyword "Unit" <|> keyword "")
tvoid = TVoid <$ (keyword "Void" <|> keyword "")
tnat = TNat <$ (keyword "Nat" <|> keyword "")
tchar = TChar <$ keyword "Char"
tfinite :: Parser Type
tfinite = tvariable <|> tlist <|> tctr <|> tgrouping
tblock :: Parser Type
tblock = tfinite
tambiguous :: Parser Type
tambiguous = try tinfix <|> try tapplication <|> tblock
tforall :: Parser Scheme
tforall = do
keyword "forall" <|> token '∀'
names <- many1 (identifier <* spaces)
token '.'
ty <- tambiguous
pure $ TForall names ty
scheme :: Parser Scheme
scheme = tforall <|> (TForall [] <$> tambiguous)
-- | Guaranteed to consume a finite amount of input
finite :: Parser AST
finite = label "finite expression" $ variable <|> hole <|> ctr <|> case_ <|> grouping
-- | Guaranteed to consume input, but may continue until it reaches a terminator
block :: Parser AST
block = label "block expression" $ abstraction <|> let_ <|> finite
-- | Not guaranteed to consume input at all, may continue until it reaches a terminator
ambiguous :: Parser AST
ambiguous = label "any expression" $ try ann <|> try application <|> block
typeParser :: Parser Type
typeParser = tambiguous
schemeParser :: Parser Scheme
schemeParser = scheme
astParser :: Parser AST
astParser = ambiguous
parseAST :: Text -> Either ParseError AST
parseAST = parse (spaces *> ambiguous <* eof) "input"
type Declaration = (Text, Maybe Type, AST)
definitionAnn :: Parser Declaration
definitionAnn = do
name <- identifier
ty <- optionMaybe $ token ':' *> tambiguous
token '='
e <- ambiguous
pure (name, ty, e)
declaration :: Parser Declaration
declaration = notFollowedBy (try let_) >> (declrec <|> decl)
where
declrec = do
try $ keyword "letrec"
(name, ty, expr) <- definitionAnn
pure (name, ty, LetRecP (name, expr) (Var name))
decl = do
keyword "let"
definitionAnn
-- | A program is a series of declarations and expressions to execute.
type ProgramAST = [Declaration]
type TopLevelAST = [Either Declaration AST]
topLevel :: Parser (Either Declaration AST)
topLevel = try (Left <$> declaration) <|> (Right <$> ambiguous)
topLevelParser :: Parser TopLevelAST
topLevelParser = spaces *> sepEndBy topLevel (token ';') <* eof
shebang :: Parser () shebang :: Parser ()
shebang = do shebang = label "shebang" do
try $ keyword "#!" try $ keyword "#!"
skipMany (noneOf "\n") skipMany (noneOf "\n")
spaces spaces
programParser :: Parser ProgramAST -- | 'Scope'
programParser = shebang *> sepEndBy declaration (token ';') <* eof scope :: Parser Scope
scope = Scope <$> many (topLevel <* token ";")
parseTopLevel :: Text -> Either ParseError TopLevelAST topLevel, tlOpen, tlDef, tlData, tlAxiom, tlSig :: Parser TopLevel
parseTopLevel = parse topLevelParser "input"
parseProgram :: Text -> Either ParseError ProgramAST -- | 'TopLevel'
parseProgram = parse programParser "input" topLevel = tlOpen <|> tlDef <|> tlData <|> tlAxiom <|> tlSig
-- | 'Open'
tlOpen = label "open statement" $
Open <$> publicOrPrivate
<*> ambiguous
<*> openUsing
<*> openHiding
<*> openRenaming
where
openUsing, openHiding :: Parser [(Text, Maybe (Maybe [Text]))]
openRenaming :: Parser [(Text, Text)]
openUsing = option [] do
keyword "using"
flexibleParens openId
openHiding = option [] do
keyword "hiding"
flexibleParens openId
openId = do
name <- identifier
ctrs <- optionMaybe $ openAll <|> openCtrs
pure (name, ctrs)
openRenaming = option [] do
keyword "renaming"
flexibleParens $ cobetween identifier tkArr identifier
openAll, openCtrs :: Parser (Maybe [Text])
openAll = Nothing <$ keyword ".."
openCtrs = optionMaybe $ flexibleParens identifier
-- | 'Define'
tlDef = Define <$> publicOrPrivate <*> definition
-- | 'TLData'
tlData = TLData <$> data_
-- | 'TLAxiom'
tlAxiom = TLAxiom <$> axiom
-- | 'TLSig'
tlSig = TLSig <$> sig
-- | 'PublicOrPrivate'
publicOrPrivate :: Parser PublicOrPrivate
publicOrPrivate = (Public <$ keyword "pub") <|> pure Private
-- | 'Definition'
definition :: Parser Definition
definition = label "definition" do
name <- identifier
ty <- optionMaybe typeSig
token "="
expr <- ambiguous
pure $ Definition name ty expr
ambiguous, block, finite :: Parser Expr
-- | 'Expr's which are not guaranteed to consume input before recursing.
ambiguous = label "expression" $
exAccess <|> exArrow <|> exAnn <|> exAppI <|> exApp <|> exTypeApp
-- | 'Expr's with a terminal symbol on its left.
block = label "block expression" $
exData <|> exAxiom <|> exForall <|> exTypeLam <|> exBlockLam <|>
exLet <|> exBlockCase <|> finite
-- | 'Expr's with a terminal symbol on both the left /and/ right.
finite = label "finite expression" $
exSig <|> exMod <|> exHole <|> exFiniteCase <|> exFiniteLam <|>
exLit <|> exVar <|> exGroup
exAccess, exArrow, exAnn, exAppI, exApp, exTypeApp,
exData, exAxiom, exForall, exTypeLam, exBlockLam, exFiniteLam, exLet,
exSig, exMod, exHole, exBlockCase, exFiniteCase, exLit, exVar, exGroup
:: Parser Expr
exGroup = label "parenthetical expression" $ betweenParens ambiguous
typeSig, kindSig, modSig :: Parser Type
typeSig = label "type signature" $ keyword ":" *> ambiguous
kindSig = label "kind signature" typeSig
modSig = label "signature" typeSig
-- | 'Data'
exData = Data <$> data_
data_ :: Parser Expr
data_ = label "data expression" do
keyword "data"
ambiguous
-- | 'Axiom'
exAxiom = Axiom <$> axiom
axiom :: Parser Expr
axiom = label "axiom expression" do
keyword "axiom"
ambiguous
-- | 'SigE'
exSig = SigE <$> sig
-- | 'Mod'
exMod = label "module expression" do
keyword "mod"
params <- sigParams
sig <- optionMaybe modSig
body <- flexibleBraces topLevel
pure $ Mod params sig body
-- | 'Access'
exAccess = label "field access" $ try do
expr <- block
tkColCol
name <- identifier
pure $ Access expr name
-- | @∷@
tkColCol :: Parser ()
tkColCol = label "" $ token "" <|> token "::"
-- | 'Forall'
exForall = label "∀ type" $ do
tkForall
uncurry Forall <$> typeBinder
-- | @∀@
tkForall :: Parser ()
tkForall = label "" $ keyword "" <|> keyword "forall"
-- | Everything about @forall@ and @^@ is the same
-- except for the token and whether a type or expr
-- (which are also syntactically the same object)
-- comes next.
--
-- This factors out all of that stuff they have in common.
typeBinder :: Parser (NonEmpty (Either (NonEmpty Text, Kind) Text), Expr)
typeBinder = do
vars <- many1' typeBinder'
tkArr
ty <- ambiguous
pure (vars, ty)
where
typeBinder', kindedBinder, plainBinder
:: Parser (Either (NonEmpty Text, Kind) Text)
typeBinder' = kindedBinder <|> plainBinder
kindedBinder = betweenParens $ do
names <- many1' identifier
kind <- kindSig
pure $ Left (names, kind)
plainBinder = Right <$> identifier
-- | 'Arrow'
exArrow = label "→ type" $ try do
(names, argTy) <- arrowArgs
tkArr
rTy <- ambiguous
pure $ Arrow names argTy rTy
where
arrowArgs, namedArgs, typeArg :: Parser ([Text], Type)
arrowArgs = namedArgs <|> typeArg
namedArgs = betweenParens do
names <- many1 identifier
ty <- kindSig
pure (names, ty)
typeArg = ([],) <$> block
-- | @→@
tkArr :: Parser ()
tkArr = label "" $ keyword "" <|> keyword "->"
-- | 'Ann'
exAnn = label "annotated expression" $ try do
expr <- block
ty <- typeSig
pure $ Ann expr ty
-- | 'Hole'
exHole = label "hole" $ Hole <$ keyword "_"
-- | 'TypeLam'
exTypeLam = label "type Λ" $ do
tkBigLam
uncurry TypeLam <$> typeBinder
-- | @Λ@
tkBigLam :: Parser ()
tkBigLam = label "Λ" $ keyword "Λ" <|> keyword "^"
-- | 'TypeApp'
exTypeApp = label "type application" $ try do
expr <- block
token "@"
arg <- ambiguous
pure $ TypeApp expr arg
-- | 'Lam' with only one undelimited case branch
exBlockLam = label "λ expression" $ try do
args <- many pattern_
tkArr
body <- ambiguous
pure $ LamBlock args body
-- | 'Lam' with delimited case branches
exFiniteLam = label "λ-case expression" $ try do
simpleArgs <- many pattern_
body <- caseBranches
pure $ Lam simpleArgs body
tkLam :: Parser ()
tkLam = label "λ" $ keyword "λ" <|> keyword "\\"
-- | 'App'
exApp = label "application" $ try do
ef <- finite
exs <- many finite
exfinal <- block
pure $ App ef (snoc' exs exfinal)
-- | 'AppI'
exAppI = label "infix application" $
fail "infix expressions not yet supported"
-- | 'Let'
exLet = label "let expression" do
keyword "let"
decls <- flexibleSepBy1 (token ";") topLevel
keyword "in"
body <- ambiguous
pure $ Let decls body
-- | 'Case' with only one undelimited case branch
exBlockCase = label "case expression" $ try do
keyword "case"
exprs <- flexibleSepBy (keyword "|") ambiguous
body <- caseBranches
pure $ Case exprs body
-- | 'Case'
exFiniteCase = label "case-of expression" $ try do
keyword "case"
exprs <- flexibleSepBy1 (keyword "|") ambiguous
keyword "of"
pats <- flexibleSepBy1 (keyword "|") pattern_
tkArr
body <- ambiguous
pure $ CaseBlock exprs pats body
-- | 'Lit'
exLit = label "literal" $ Lit <$> lit ambiguous
-- | 'Var'
exVar = label "variable" $ Var <$> identifier
-- | 'Sig'
sig, sigADT, sigGADT, sigSig :: Parser Sig
sig = label "signature" $ sigADT <|> sigGADT <|> sigSig
sigParams :: Parser [Either (NonEmpty Text, Kind) Text]
sigParams = label "signature parameters" $ many $ annParam <|> unannParam
where
annParam, unannParam :: Parser (Either (NonEmpty Text, Kind) Text)
annParam = betweenParens do
names <- many1' identifier
kind <- kindSig
pure $ Left (names, kind)
unannParam = Right <$> identifier
-- | 'ADT'
sigADT = label "adt" do
keyword "adt"
name <- identifier
params <- sigParams
kind <- optionMaybe kindSig
(codataQ, body) <- betweenBraces $
emptyData <|> emptyCodata <|> dataCtrs <|> codataElims
pure $ SigQs params $ ADT codataQ name kind body
where
emptyData, emptyCodata, dataCtrs, codataElims
:: Parser (Bool, [(Text, [Type])])
emptyData = try $ (False, []) <$ keyword "+"
emptyCodata = try $ (True, []) <$ keyword "&"
dataCtrs = try $ (False,) <$> flexibleSepBy1 (keyword "+") do
name <- identifier
ctrs <- many $ notFollowedBy (keyword "+") >> finite
pure (name, ctrs)
codataElims = try $ (True,) <$> flexibleSepBy1 (keyword "&") do
name <- identifier
elims <- many $ notFollowedBy (keyword "&") >> finite
pure (name, elims)
-- | 'GADT'
sigGADT = label "gadt" do
keyword "gadt"
name <- identifier
params <- sigParams
kind <- optionMaybe kindSig
ctrs <- flexibleBraces declaration
pure $ SigQs params $ GADT name kind ctrs
-- | 'Sig'
sigSig = label "sig" do
keyword "sig"
params <- sigParams
ctrs <- flexibleBraces $ (Left <$> declaration) <|> (Right <$> topLevel)
pure $ SigQs params $ Sig ctrs
declaration :: Parser (NonEmpty Text, Type)
declaration = label "declaration" $ (,) <$> many1' identifier <*> typeSig
pattern_, finitePattern, patVar, patIrr, patImp, patApp,
patAppI, patView, patLit, patGroup :: Parser Pattern
-- | 'Pattern'
pattern_ = label "pattern" $
patApp <|> patAppI <|> patView <|> finitePattern
finitePattern =
patIrr <|> patImp <|> patLit <|> patVar <|> patGroup
patGroup = betweenParens pattern_
-- | 'PatVar'
patVar = label "pattern variable" $ PatVar <$> identifier <*> optionMaybe typeSig
-- | 'Irrelevant'
patIrr = label "irrelevant pattern" $ Irrelevant <$ keyword "_"
-- | 'Impossible'
patImp = label "impossible pattern" $ Impossible <$ keyword "!"
-- | 'PatApp'
patApp = label "pattern application" $ do
name <- identifier
pats <- many1 finitePattern
pure $ PatApp name pats
-- FIXME: Implement infix pattern application
-- | 'PatAppI'
patAppI = label "infix pattern application" $
fail "infix pattern application not yet implemented"
-- | 'View'
patView = label "view pattern" $ try $ View <$> block <*> (tkArr *> pattern_)
-- | 'PatLit'
patLit = label "literal pattern" $ PatLit <$> lit pattern_
caseBranches :: Parser [CaseBranch]
caseBranches = label "case branches" $ flexibleBraces caseBranch
caseBranch :: Parser CaseBranch
caseRefinement, caseGuards, casePlain :: [Pattern] -> Parser CaseBranch
-- | 'CaseBranch'
caseBranch = label "case branch" do
pats <- many1 pattern_
caseRefinement pats <|> caseGuards pats <|> casePlain pats
-- FIXME: Implement case refinement
-- | 'Refinement'
caseRefinement pats = label "case refinement" $
fail "case refinement not implemented"
-- FIXME: Implement case guards
-- | 'Guards'
caseGuards pats = label "guard clauses" $
fail "case guards not yet implemented"
-- | 'PlainCase'
casePlain pats = do
tkArr
PlainCase pats <$> ambiguous
-- FIXME: Implement guard clauses
-- | 'Guard'
guard :: Parser Guard
guard = label "guard clause" $
fail "guard clauses not yet implemented"
lit, litList :: Parser a -> Parser (Lit a)
litInt, litChar, litStr :: Parser (Lit a)
-- | 'Lit'
lit m = label "literal" $ litInt <|> litChar <|> litStr <|> litList m
-- | 'LitInt'
litInt = label "integer literal" $ LitInt . read <$> try do
sign <- optionMaybe $ char '+' <|> char '-'
digits <- many1 digit
pure $ maybe digits (: digits) sign
-- | 'LitChar'
litChar = label "character literal" $
fmap LitChar $ between (char '\'') (char '\'') $ noneOf "\\'\n"
-- | 'LitStr'
litStr = label "string literal" $ fmap (LitStr . T.pack) $
between (char '"') (char '"') $ many1 $ noneOf "\\\"\n"
-- | 'LitList'
litList m = label "list literal" $ LitList <$> flexibleBrackets m
-- | 'FlexibleSeparatorSyntax'
flexibleSepBy :: Parser a -> Parser b -> Parser [b]
flexibleSepBy delim m = do
optional delim
sepEndBy m delim
-- | 'FlexibleSeparatorSyntax'
flexibleSepBy1 :: Parser a -> Parser b -> Parser [b]
flexibleSepBy1 delim m = do
optional delim
sepEndBy1 m delim
flexibleParens, flexibleBrackets, flexibleBraces :: Parser a -> Parser [a]
-- | 'FlexibleSeparatorSyntax', with the @;@ separator between @(@ @)@
flexibleParens = betweenParens . flexibleSepBy (token ";")
-- | 'FlexibleSeparatorSyntax', with the @;@ separator between @[@ @]@
flexibleBrackets = betweenBrackets . flexibleSepBy (token ";")
-- | 'FlexibleSeparatorSyntax', with the @;@ separator between @{@ @}@
flexibleBraces = betweenBraces . flexibleSepBy (token ";")
---
--- Helper parsers which aren't explicitly specified by the AST
---
-- | An identifier is a sequence of characters with no token which is not a keyword.
identifier :: Parser Text
identifier = label "identifier" $ try do
notFollowedBy anyKeyword
name <- T.pack <$> (notFollowedBy anyToken *> many1 anyChar <* spaces)
if any ((`elem` keywords) . T.unpack) $ T.splitOn "_" name
then fail "identifier contained keywords"
else pure name
token :: String -> Parser ()
token tk = label tk $ string tk *> spaces
anyToken :: Parser ()
anyToken = label "any token" $ choice $ map token tokens
-- | Tokens which are reserved by syntax
-- and may not appear /anywhere/ in an identifier.
tokens :: [String]
tokens =
[ "//", "/*", "*/"
, "(", ")", "{", "}"
, ";", "", "::", "@"
, " ", "\r", "\n", "\t"
, "```"
]
-- | A keyword is an exact string which is not part of an identifier.
keyword :: String -> Parser ()
keyword kwd = label kwd $ do
try do
_ <- string kwd
notFollowedBy (notFollowedBy anyToken >> anyChar)
spaces
anyKeyword :: Parser ()
anyKeyword = label "any keyword" $ choice $ map keyword keywords
-- | Keywords are reserved by syntax and are not allowed to be used as identifiers
-- or as a part of a `_`-separated identifier.
--
-- However, they may be used as /part/ of an identifier.
keywords :: [String]
keywords =
[ "open", "using", "hiding", "renaming", "..", "pub"
, "data", "axiom", "adt", "gadt", "sig", "mod"
, "forall"
, "let", "in", "case"
, "[", "]"
, ":", "", "Λ", "^", "λ", "\\", "", "->"
, "|", "?", "!", "_", "..."
]
spaces :: Parser ()
spaces = label "whitespace or comments" $
Text.Parsec.spaces >> optional (try (comment >> spaces))
comment, lineComment, blockComment :: Parser ()
comment = label "comment" $ lineComment <|> blockComment
lineComment = label "line comment" $ do
_ <- try (string "//")
_ <- many1 (noneOf "\n")
pure ()
blockComment = label "block comment" $ do
_ <- try (string "/*")
_ <- many1 $ notFollowedBy (string "*/") >> anyChar
_ <- string "*/"
pure ()
-- | The opposite of `between`: ignore the middle, keep the sides
cobetween :: Parser a -> Parser b -> Parser c -> Parser (a, c)
cobetween a b c = do
x <- a
_ <- b
y <- c
pure (x, y)
-- the parens/brackets/braces word choice is totally meaningless
betweenParens, betweenBrackets, betweenBraces :: Parser a -> Parser a
-- | Between @(@ @)@
betweenParens = between (token "(") (token ")")
-- | Between @[@ @]@
betweenBrackets = between (keyword "[") (keyword "]")
-- | Between @{@ @}@
betweenBraces = between (keyword "{") (keyword "}")
many1' :: Parser a -> Parser (NonEmpty a)
many1' = fmap fromList . many1
snoc' :: [a] -> a -> NonEmpty a
snoc' xs x = fromList $ xs ++ [x]
label :: String -> Parser a -> Parser a
label = flip Text.Parsec.label

View File

@ -1,4 +1,7 @@
module Ivo.Syntax.Printer (unparseAST, unparseType, unparseScheme) where -- | Turn abstract syntax into the corresponding concrete syntax.
--
-- This is /not/ a pretty-printer; it uses minimal whitespace)
module Ivo.Syntax.Printer (unparseScope, unparseTopLevel, unparseExpr) where
import Ivo.Syntax.Base import Ivo.Syntax.Base
@ -10,6 +13,99 @@ import Data.Text.Lazy (fromStrict, toStrict, intercalate, unwords, singleton)
import Data.Text.Lazy.Builder (Builder, fromText, fromLazyText, toLazyText, fromString) import Data.Text.Lazy.Builder (Builder, fromText, fromLazyText, toLazyText, fromString)
import Prelude hiding (unwords) import Prelude hiding (unwords)
unparseScope :: Scope -> Text
unparseScope = unambiguous . scope
unparseTopLevel :: TopLevel -> Text
unparseTopLevel = unambiguous . topLevel
unparseExpr :: Expr -> Text
unparseExpr = unambiguous . expr
type Unparser a = a -> Tagged Builder
scope :: Unparser Scope
scope (Scope tls) = tag Ambiguous $ unmany (topLevel <? ";\n") tls
(<?) :: Unparser a -> Text -> Unparser a
u <? txt = \x -> u x <> fromText txt
topLevel :: Unparser TopLevel
topLevel = \case
Open pub expr use hide rename ->
publicOrPrivate pub <>
unambiguousExpr expr <> " " <>
openUsing use <> " "
openHiding hide <> " "
openRenaming rename
Define pub def ->
publicOrPrivate pub <>
definition def
TLData expr -> data_ expr
TLAxiom expr -> axiom expr
TLSig s -> sig s
publicOrPrivate :: PublicOrPrivate -> Builder
publicOrPrivate Public = "pub "
publicOrPrivate Private = ""
openUsing, openHiding :: [(Text, Maybe (Maybe [Text]))] -> Builder
-- FIXME
openUsing _ = "using (some stuff) "
-- FIXME
openHiding _ = "hiding (some stuff) "
-- FIXME
openRenaming :: [(Text, Text)]
openRenaming _ = "renaming (some stuff) "
definition :: Unparser Definition
definition (Definition name ty expr) =
fromText name <> " " <> typeSig ty <> "= " <> unambiguousExpr expr
expr :: Unparser Expr
expr = \case
Data expr -> tag Block $ data_ expr
Axiom expr -> tag Block $ axiom expr
SigE s -> tag Block $ sig s
-- FIXME
Mod _ _ _ -> tag Block $ error "I don't know how to unparse modules"
Access expr name -> tag Ambiguous $ blockExpr expr <> "::" <> fromText name
Forall binders ty -> tag Block $ "" <> typeBinders binders <> "" <> ambiguousExpr ty
Arrow [] arg ty -> tag Ambiguous $ blockExpr arg <> "" <> ambiguousExpr ty
Ann expr ty -> tag Ambiguous $ blockExpr expr <> ": " <> ambiguousExpr ty
Hole -> tag Finite "_"
TypeLam binders ty -> tag Block $ "Λ " <> typeBinders binders <> "" <> ambiguousExpr ty
TypeApp expr ty -> tag Ambiguous $ blockExpr <> "@" <> finiteExpr ty
Lam pats cases -> tag Block $ "λ " <> unmany1 pattern_ pats <> caseBranches cases
data_ = undefined
axiom = undefined
typeBinders :: Unparser (NonEmpty (Either (NonEmpty Text, Kind) Text))
typeBinders = error "Whatever"
sig :: Unparser Sig
sig = error "I don't know how to unparse signatures"
pattern_ :: Unparser Pattern
pattern_ = error "I don't know how to unparse patterns"
caseBranch :: Unparser CaseBranch
caseBranch = error "I don't know how to unparse case branches"
caseBranches :: Unparser [CaseBranch]
caseBranches = error "I don't know how to unparse case branches"
guard :: Unparser Guard
guard = error "I don't know how to unparse guard clauses"
lit :: Unparser a -> Unparser (Lit a)
lit u = error "I don't know how to unparse literals"
-- I'm surprised this isn't in base somewhere. -- I'm surprised this isn't in base somewhere.
unsnoc :: NonEmpty a -> ([a], a) unsnoc :: NonEmpty a -> ([a], a)
unsnoc = cata \case unsnoc = cata \case
@ -47,78 +143,13 @@ ambiguous :: Tagged Builder -> Builder
ambiguous (Finite, t) = t ambiguous (Finite, t) = t
ambiguous (_, t) = group t ambiguous (_, t) = group t
-- | Turn an abstract syntax tree into the corresponding concrete syntax. ambiguousExpr, blockExpr, finiteExpr :: Expr -> Builder
--
-- This is *not* a pretty-printer; it uses minimal whitespace.
unparseAST :: AST -> Text
unparseAST = toStrict . toLazyText . snd . cata \case
VarF name -> tag Finite $ fromText name
AppF ef exs -> unparseApp ef exs
AbsF names body -> tag Block $
let names' = fromLazyText (unwords $ map fromStrict $ toList names)
in "λ" <> names' <> ". " <> unambiguous body
LetFP defs body -> tag Block $ "let " <> unparseDefs defs <> " in " <> unambiguous body
LetRecFP def body -> tag Block $ "letrec " <> unparseDef def <> " in " <> unambiguous body
CtrF ctr e -> unparseCtr ctr e
CaseF pats ->
let pats' = fromLazyText $ intercalate "; " $ map (toLazyText . unparsePat) pats
in tag Finite $ "{ " <> pats' <> " }"
AnnF () e t -> tag Ambiguous $ final e <> " : " <> fromText (unparseType t)
PNatF n -> tag Finite $ fromString $ show n
PListF es ->
let es' = fromLazyText $ intercalate ", " $ map (toLazyText . unambiguous) es
in tag Finite $ "[" <> es' <> "]"
PStrF s -> tag Finite $ "\"" <> fromText s <> "\""
PCharF c -> tag Finite $ "'" <> fromLazyText (singleton c)
HoleFP -> tag Finite "_"
where
unparseApp :: Tagged Builder -> NonEmpty (Tagged Builder) -> Tagged Builder
unparseApp ef (unsnoc -> (exs, efinal))
= tag Ambiguous $ foldr (\e es' -> ambiguous e <> " " <> es') (final efinal) (ef : exs)
unparseDef (name, val) = fromText name <> " = " <> unambiguous val ambiguousExpr = ambiguous . expr
unparseDefs defs = fromLazyText (intercalate "; " $ map (toLazyText . unparseDef) $ toList defs)
unparseCtr :: Ctr -> [Tagged Builder] -> Tagged Builder blockExpr = final . expr
-- Fully-applied special syntax forms
unparseCtr CPair [x, y] = tag Finite $ "(" <> unambiguous x <> ", " <> unambiguous y <> ")"
unparseCtr CCons [x, y] = tag Finite $ "(" <> unambiguous x <> " :: " <> unambiguous y <> ")"
-- Partially-applied syntax forms
unparseCtr CUnit [] = tag Finite "()"
unparseCtr CPair [] = tag Finite "(,)"
unparseCtr CLeft [] = tag Finite "Left"
unparseCtr CRight [] = tag Finite "Right"
unparseCtr CZero [] = tag Finite "Z"
unparseCtr CSucc [] = tag Finite "S"
unparseCtr CNil [] = tag Finite "[]"
unparseCtr CCons [] = tag Finite "(::)"
unparseCtr CChar [] = tag Finite "Char"
unparseCtr ctr (x:xs) = unparseApp (unparseCtr ctr []) (x :| xs)
unparsePat (Pat ctr ns e) finiteExpr = unambiguous . expr
= unambiguous (unparseCtr ctr (map (tag Finite . fromText) ns)) <> " -> " <> unambiguous e
-- HACK unmany :: Foldable t => Unparser a -> t a -> Builder
pattern TApp2 :: Type -> Type -> Type -> Type unmany u xs = error "fuck it, implement this later"
pattern TApp2 tf tx ty = TApp (TApp tf tx) ty
-- TODO: Improve these printers.
unparseType :: Type -> Text
unparseType (TVar name) = name
unparseType (TApp2 TAbs a b) = "(" <> unparseType a <> " -> " <> unparseType b <> ")"
unparseType (TApp2 TProd a b) = "(" <> unparseType a <> " * " <> unparseType b <> ")"
unparseType (TApp2 TSum a b) = "(" <> unparseType a <> " + " <> unparseType b <> ")"
unparseType (TApp TList a) = "[" <> unparseType a <> "]"
unparseType (TApp a b) = "(" <> unparseType a <> " " <> unparseType b <> ")"
unparseType TAbs = "(->)"
unparseType TProd = "(*)"
unparseType TSum = "(+)"
unparseType TUnit = ""
unparseType TVoid = ""
unparseType TNat = "Nat"
unparseType TList = "[]"
unparseType TChar = "Char"
unparseScheme :: Scheme -> Text
unparseScheme (TForall [] t) = unparseType t
unparseScheme (TForall names t) = "" <> T.unwords names <> ". " <> unparseType t