Make the parser produce a subtly better AST.

This is done by requiring applications to be at least 2 terms,
and making grouping a syntactic feature instead of a feature of applications.

I also made the code a bit more clear by handling whitespace a bit more cleanly;
now, each parser is responsible for its own trailing whitespace,
and whitespace handling is pushed down to the token level
to avoid cluttering high-level definitions with whitespace stuff.

This all also had the effect of improving error messages,
in part due to me labelling many of the parsers.
master
James T. Martin 2020-11-03 15:52:04 -08:00
parent 79e054700b
commit 59a55acdc6
Signed by: james
GPG Key ID: 4B7F3DA9351E577C
4 changed files with 39 additions and 35 deletions

View File

@ -1,4 +1,4 @@
# James Martin's Lambda Calculus # Lambda Calculus
This is a simple implementation of the untyped lambda calculus This is a simple implementation of the untyped lambda calculus
with an emphasis on clear, readable Haskell code. with an emphasis on clear, readable Haskell code.
@ -15,13 +15,15 @@ y y
y y
>> (\x y z. x y) y >> (\x y z. x y) y
λy' z. y y' λy' z. y y'
>> let fix = (\x. x x) \fix f x. f (fix fix f) x; S = \n f x. f (n f x); plus = fix \plus x. x S in plus (\f x. f (f (f x))) (\f x. f (f x)) f x
f (f (f (f (f x))))
>> ^C >> ^C
``` ```
## Notation ## Notation
[Conventional Lambda Calculus notation applies](https://en.wikipedia.org/wiki/Lambda_calculus_definition#Notation), [Conventional Lambda Calculus notation applies](https://en.wikipedia.org/wiki/Lambda_calculus_definition#Notation),
with the exception that variable names are multiple characters long, with the exception that variable names are multiple characters long,
`\` is used in lieu of `λ` to make it easier to type, `\` is permitted in lieu of `λ` to make it easier to type,
and spaces are used to separate variables rather than commas. and spaces are used to separate variables rather than commas.
* Variable names are alphanumeric, beginning with a letter. * Variable names are alphanumeric, beginning with a letter.

View File

@ -13,6 +13,7 @@ extra-source-files:
- README.md - README.md
default-extensions: default-extensions:
- BlockArguments
- ImportQualifiedPost - ImportQualifiedPost
- OverloadedStrings - OverloadedStrings
- ViewPatterns - ViewPatterns

View File

@ -9,64 +9,67 @@ import qualified Data.Text as T
import LambdaCalculus.Expression (Expression, ast2expr) import LambdaCalculus.Expression (Expression, ast2expr)
import qualified LambdaCalculus.Expression as Expr import qualified LambdaCalculus.Expression as Expr
import LambdaCalculus.Parser.AbstractSyntax import LambdaCalculus.Parser.AbstractSyntax
import Text.Parsec hiding (spaces) import Text.Parsec hiding (label, token)
import Text.Parsec.Text import Text.Parsec qualified
import Text.Parsec.Text (Parser)
import TextShow import TextShow
label :: String -> Parser a -> Parser a
label = flip Text.Parsec.label
token :: Char -> Parser ()
token ch = label [ch] $ char ch *> spaces
keywords :: [Text] keywords :: [Text]
keywords = ["let", "in"] keywords = ["let", "in"]
-- | A keyword is an exact string which is not part of an identifier. -- | A keyword is an exact string which is not part of an identifier.
keyword :: Text -> Parser () keyword :: Text -> Parser ()
keyword kwd = do keyword kwd = label (T.unpack kwd) $ do
void $ string (T.unpack kwd) try do
notFollowedBy letter string $ T.unpack kwd
notFollowedBy letter
spaces
-- | An identifier is a sequence of letters which is not a keyword. -- | An identifier is a sequence of letters which is not a keyword.
identifier :: Parser Identifier identifier :: Parser Identifier
identifier = do identifier = label "identifier" $ do
notFollowedBy anyKeyword notFollowedBy anyKeyword
T.pack <$> many1 letter T.pack <$> (many1 letter <* spaces)
where anyKeyword = choice $ map (try . keyword) keywords where anyKeyword = choice $ map keyword keywords
variable :: Parser AbstractSyntax variable :: Parser AbstractSyntax
variable = Variable <$> identifier variable = label "variable" $ Variable <$> identifier
spaces :: Parser () many2 :: Parser a -> Parser [a]
spaces = skipMany1 space many2 p = (:) <$> p <*> many1 p
grouping :: Parser AbstractSyntax
grouping = label "grouping" $ between (token '(') (token ')') expression
application :: Parser AbstractSyntax application :: Parser AbstractSyntax
application = Application <$> sepEndBy1 applicationTerm spaces application = Application <$> many2 applicationTerm
where applicationTerm :: Parser AbstractSyntax where applicationTerm :: Parser AbstractSyntax
applicationTerm = abstraction <|> grouping <|> let_ <|> variable applicationTerm = abstraction <|> let_ <|> grouping <|> variable
where grouping :: Parser AbstractSyntax
grouping = between (char '(') (char ')') expression
abstraction :: Parser AbstractSyntax abstraction :: Parser AbstractSyntax
abstraction = do abstraction = label "lambda abstraction" $ Abstraction <$> between lambda (token '.') (many1 identifier) <*> expression
char '\\' <|> char 'λ' ; optional spaces where lambda = label "lambda" $ (char '\\' <|> char 'λ') *> spaces
names <- sepEndBy1 identifier spaces
char '.'
Abstraction names <$> expression
let_ :: Parser AbstractSyntax let_ :: Parser AbstractSyntax
let_ = do let_ = Let <$> between (keyword "let") (keyword "in") definitions <*> expression
try (keyword "let") ; spaces where definitions :: Parser [Definition]
defs <- sepBy1 definition (char ';' *> optional spaces) definitions = flip sepBy1 (token ';') do
keyword "in" name <- identifier
Let defs <$> expression token '='
where definition :: Parser Definition
definition = do
name <- identifier ; optional spaces
char '='
value <- expression value <- expression
pure (name, value) pure (name, value)
expression :: Parser AbstractSyntax expression :: Parser AbstractSyntax
expression = optional spaces *> (abstraction <|> let_ <|> application <|> variable) <* optional spaces expression = label "expression" $ abstraction <|> let_ <|> try application <|> grouping <|> variable
parseAST :: Text -> Either ParseError AbstractSyntax parseAST :: Text -> Either ParseError AbstractSyntax
parseAST = parse (expression <* eof) "input" parseAST = parse (spaces *> expression <* eof) "input"
parseExpression :: Text -> Either ParseError Expression parseExpression :: Text -> Either ParseError Expression
parseExpression = fmap ast2expr . parseAST parseExpression = fmap ast2expr . parseAST

View File

@ -55,8 +55,6 @@ main = defaultMain $
, testGroup "Parser tests" , testGroup "Parser tests"
[ testGroup "Unit tests" [ testGroup "Unit tests"
[ testCase "identity" $ parseExpression "\\x.x" @?= Right (Abstraction "x" $ Variable "x") [ testCase "identity" $ parseExpression "\\x.x" @?= Right (Abstraction "x" $ Variable "x")
-- This syntax is forbidden because it interacts poorly with other syntax, e.g. `let x=in` becoming a valid program.
--, testCase "nullary application" $ parseExpression "()" @?= Right (Abstraction "x" $ Variable "x")
, testCase "unary application" $ parseExpression "(x)" @?= Right (Variable "x") , testCase "unary application" $ parseExpression "(x)" @?= Right (Variable "x")
, testCase "application shorthand" $ parseExpression "a b c d" @?= Right (Application (Application (Application (Variable "a") (Variable "b")) (Variable "c")) (Variable "d")) , testCase "application shorthand" $ parseExpression "a b c d" @?= Right (Application (Application (Application (Variable "a") (Variable "b")) (Variable "c")) (Variable "d"))
, testCase "let" $ parseExpression "let x = \\y.y in x" @?= Right (Application (Abstraction "x" (Variable "x")) (Abstraction "y" (Variable "y"))) , testCase "let" $ parseExpression "let x = \\y.y in x" @?= Right (Application (Abstraction "x" (Variable "x")) (Abstraction "y" (Variable "y")))