From ec7ad2236b42a8b47d3e4c76fbc3b281c6a1e091 Mon Sep 17 00:00:00 2001
From: James Martin <james@jtmar.me>
Date: Wed, 21 Aug 2019 15:18:25 -0700
Subject: [PATCH] Added `let`, removed `Nil`, replaced `subst` with `Subst`
 exprs.

---
 README.md                           | 63 +++++++++++------------------
 src/UntypedLambdaCalculus.hs        | 63 ++++++++++++++---------------
 src/UntypedLambdaCalculus/Parser.hs | 31 ++++++++++----
 3 files changed, 77 insertions(+), 80 deletions(-)

diff --git a/README.md b/README.md
index 9bd4f0d..82c644c 100644
--- a/README.md
+++ b/README.md
@@ -14,51 +14,34 @@ Exit the prompt with `Ctrl-C` (or however else you kill a program in your termin
 Bound variables will be printed followed by a number representing the number of binders
 between it and its definition for disambiguation.
 
-## Differences from the traditional lambda calculus
-This version of the lambda calculus is completely compatible
-with the traditional lambda calculus, but features a few extensions.
-
-### Nullary applications
-In any binary application `(f x)`, the `f` is a function and the `x` is a variable.
-Applications are left-associative, meaning `(f x y)` equals `((f x) y)`.
-Any term `x` may be expanded to an application `(id x)`.
-Working backwards, we have `(f x y)` equals `(((id f) x) y)`.
-Thus we may reasonably say that `() = id`
-and thus that `(f x y)` equals `(((() f) x) y)`,
-and that `(x)` equals `(() x)`, which reduces to just `x`.
-
-### Nullary functions and generalized eta reductions
-We can apply a similar argument to the function syntax.
-`(\x y z. E)` is the same as `(\x.(\y.(\z.E)))` because lambda is right-associative.
-Any term `x` may be eta-expanded into a lambda `(\y. ((() x) y))`.
-Working backwards, we have `(\x. (() x))` eta-reducing to `()`.
-Therefore, the identity function eta-reduces to just `()`.
-
-Again similarly we have the nulladic lambda syntax `(\.E)`
-which trivially beta-reduces to `E`.
-
-I also take any series of ordered applications
-`(\v v1 v2 ... vn. v:n v2:(n-1) ... v(n-1):1 vn:0)`
-to eta-reduce to `()` (including `()` itself and, trivially, `(\x. x)`).
-
-### Nullary functions
-
-### Examples
+### Example session
 ```
->> (\D F I. D (F I)) (\x. x x) (\f. f (f y)) (\x. x)
+>> let D = (\x. x x) in let F = (\f. f (f y)) in D (F ())
 y y
->> (\T f x. T (T (T (T T))) f x) (\f x. f (f x)) (\x. x) y
+>> let T = (\f x. f (f x)) in (\f x. T (T (T (T T))) f x) () y
 y
 >> \x. \y. y x
 \x. \y. y:0 x:1
+>> ^C
 ```
 
-## Syntax
-* Variables are alphanumeric, beginning with a letter.
-* Applications are left-associative, and parentheses are not necessary.
-* Lambdas are represented by `\`, bind as far right as possible, and parentheses are not necessary.
+## Notation
+[Conventional Lambda Calculus notation applies](https://en.wikipedia.org/wiki/Lambda_calculus_definition#Notation),
+with the exception that variable names are mmultiple characters long,
+and `\` is used in lieu of `λ` for convenience.
 
-### Examples
-* Variables: `x`, `abc`, `D`, `fooBar`, `f10`
-* Applications: `(\x. x x) y`, `a b`, `((g f) y)`
-* Lambdas: `\x. N`, `\x y. y`, `(\x. f x)`
\ No newline at end of file
+* Variable names are alphanumeric, beginning with a letter.
+* Outermost parentheses may be dropped: `M N` is equivalent to `(M N)`.
+* Applications are left-associative: `M N P` may be written instead of `((M N) P)`.
+* The body of an abstraction extends as far right as possible: `\x. M N` means `\x.(M N)` and not ``(\x. M) N`.
+* A sequence of abstractions may be contracted: `\foo. \bar. \baz. N` may be abbreviated as `\foo bar baz. N`.
+* Variables may be bound using let expressions: `let x = N in M` abbreviates `(\x. N) M`.
+
+### Violations of convention
+* I use spaces to separate variables in abstractions instead of commas because I think it looks better.
+
+### Additional extensions to notation
+Since `\x. x` is the left identity of applications and application syntax is left-associative,
+I (syntactically) permit unary and nullary applications so that `()` is `\x. x`, and `(x)` is `x`.
+
+On the same principle, the syntax of a lambda of no variables `\. e` is `e`.
diff --git a/src/UntypedLambdaCalculus.hs b/src/UntypedLambdaCalculus.hs
index 9a1478d..29951bf 100644
--- a/src/UntypedLambdaCalculus.hs
+++ b/src/UntypedLambdaCalculus.hs
@@ -1,9 +1,8 @@
 {-# LANGUAGE TemplateHaskell, TypeFamilies, DeriveFunctor, DeriveFoldable, DeriveTraversable, MultiWayIf #-}
-module UntypedLambdaCalculus (Expr (Free, Var, Lam, App, Nil), ReaderAlg, eval, cataReader) where
+module UntypedLambdaCalculus (Expr (Free, Var, Lam, App), ReaderAlg, eval, cataReader) where
 
-import Control.Monad.Reader (Reader, runReader, local, reader, ask)
+import Control.Monad.Reader (Reader, runReader, local, reader)
 import Data.Foldable (fold)
-import Data.Functor ((<&>))
 import Data.Functor.Foldable (Base, Recursive, cata, embed, project)
 import Data.Functor.Foldable.TH (makeBaseFunctor)
 import Data.Monoid (Any (Any, getAny))
@@ -14,7 +13,7 @@ data Expr = Free String
           | Var Int
           | Lam String Expr
           | App Expr Expr
-          | Nil
+          | Subst Int Expr Expr
 
 makeBaseFunctor ''Expr
 
@@ -36,7 +35,10 @@ instance Show Expr where
             f <- f'
             x <- x'
             return $ "(" ++ f ++ " " ++ x ++ ")"
-          alg NilF = return "()"
+          alg (SubstF index val' body') = do
+            body <- local ("SUBSTVAR" :) body'
+            val <- val'
+            return $ body ++ "[ " ++ show index ++ " := " ++ val ++ " ]"
 
 -- | Is the innermost bound variable of this subexpression (`Var 0`) used in its body?
 -- | For example: in `\x. a:1 x:0 b:2`, `x:0` is bound in `a:1 x:0 b:2`.
@@ -61,41 +63,36 @@ liftExpr n (Var i)     = Var $ i + n
 liftExpr _ o@(Lam _ _) = o
 liftExpr n x           = embed $ fmap (liftExpr n) $ project x
 
-subst :: Expr -> Expr -> Expr
-subst val = cataReader alg 0
-  where alg :: ReaderAlg ExprF Int Expr
-        alg (VarF i)   = ask <&> \bindingDepth -> if
-          | i == bindingDepth -> liftExpr bindingDepth val
-          | i >  bindingDepth -> Var $ i - 1
-          | otherwise -> Var i
-        alg (LamF v e) = Lam v <$> local (+ 1) e
-        alg x = embed <$> sequence x
+substitute :: Int -> Expr -> Expr -> Expr
+substitute index val v@(Var index')
+  | index == index' = val
+  | index <  index' = Var $ index' - 1
+  | otherwise       = v
+substitute index val (Lam name body) = Lam name $ Subst (index + 1) (liftExpr 1 val) body
+substitute index val (Subst index2 val2 body) =
+  substitute index val $ substitute index2 val2 body
+substitute index val x = embed $ fmap (Subst index val) $ project x
 
--- | Generalized eta reduction. I (almost certainly re-)invented it myself.
-etaReduce :: Expr -> Expr
--- Degenerate case
--- The identity function reduces to the syntactic identity, `Nil`.
-etaReduce (Lam _ (Var 0)) = Nil
+etaReduce :: String -> Expr -> Expr
 -- `\x. f x -> f` if `x` is not bound in `f`.
-etaReduce o@(Lam _ (App f (Var 0)))
-  | unbound f = eval $ subst undefined f
-  | otherwise = o
+etaReduce name o@(App f (Var 0))
+  | unbound f = eval $ Subst 0 undefined f
+  | otherwise = Lam name $ o
 -- `\x y. f y -> \x. f` if `y` is not bound in `f`;
 -- the resultant term may itself be eta-reducible.
-etaReduce (Lam v e'@(Lam _ _)) = case etaReduce e' of
-  e@(Lam _ _) -> Lam v e
-  e -> etaReduce $ Lam v e
-etaReduce x = x
+etaReduce name (Lam name' body') = case etaReduce name' body' of
+  body@(Lam _ _) -> Lam name body
+  body -> etaReduce name body
+etaReduce name body = Lam name body
 
-betaReduce :: Expr -> Expr
-betaReduce (App f' x) = case eval f' of
-  Lam _ e -> eval $ subst x e
-  Nil -> eval x
+betaReduce :: Expr -> Expr -> Expr
+betaReduce f' x = case eval f' of
+  Lam _ e -> eval $ Subst 0 x e
   f -> App f $ eval x
-betaReduce x = x
 
 -- | Evaluate an expression to normal form.
 eval :: Expr -> Expr
-eval a@(App _ _) = betaReduce a
-eval l@(Lam _ _) = etaReduce l
+eval (Subst index val body) = eval $ substitute index val body
+eval (App f x)              = betaReduce f x
+eval (Lam name body)        = etaReduce name body
 eval o = o
diff --git a/src/UntypedLambdaCalculus/Parser.hs b/src/UntypedLambdaCalculus/Parser.hs
index e8a5eb2..e2498da 100644
--- a/src/UntypedLambdaCalculus/Parser.hs
+++ b/src/UntypedLambdaCalculus/Parser.hs
@@ -5,13 +5,14 @@ import Control.Applicative (liftA2)
 import Control.Monad.Reader (local, asks)
 import Data.List (foldl', elemIndex)
 import Data.Functor.Foldable.TH (makeBaseFunctor)
-import Text.Parsec (SourceName, ParseError, (<|>), many, sepBy, letter, alphaNum, char, between, spaces, parse)
+import Text.Parsec (SourceName, ParseError, (<|>), many, sepBy, letter, alphaNum, char, between, spaces, parse, string)
 import Text.Parsec.String (Parser)
-import UntypedLambdaCalculus (Expr (Free, Var, Lam, App, Nil), ReaderAlg, cataReader)
+import UntypedLambdaCalculus (Expr (Free, Var, Lam, App), ReaderAlg, cataReader)
 
 data Ast = AstVar String
          | AstLam [String] Ast
          | AstApp [Ast]
+         | AstLet String Ast Ast
 
 makeBaseFunctor ''Ast
 
@@ -37,13 +38,27 @@ lam = do
 
 -- | An application expression.
 app :: Parser Ast
-app = AstApp <$> safeExpr `sepBy` spaces
+app = AstApp <$> consumesInput `sepBy` spaces
+
+let_ :: Parser Ast
+let_ = do
+  string "let "
+  bound <- name
+  string " = "
+  -- we can't allow raw `app` or `lam` here
+  -- because they will consume the `in` as a variable.
+  val <- let_ <|> var <|> parens app
+  char ' '
+  spaces
+  string "in "
+  body <- app
+  return $ AstLet bound val body
 
 -- | An expression, but where applications must be surrounded by parentheses,
 -- | to avoid ambiguity (infinite recursion on `app` in the case where the first
 -- | expression in the application is also an `app`, consuming no input).
-safeExpr :: Parser Ast
-safeExpr = var <|> lam <|> parens (lam <|> app)
+consumesInput :: Parser Ast
+consumesInput = let_ <|> var <|> lam <|> parens app
 
 toExpr :: Ast -> Expr
 toExpr = cataReader alg []
@@ -55,8 +70,10 @@ toExpr = cataReader alg []
         Just index -> Var index
         Nothing    -> Free varName
     alg (AstLamF vars body) = foldr (\v e -> Lam v <$> local (v :) e) body vars
-    alg (AstAppF [e]) = e
-    alg (AstAppF es) = foldl' App Nil <$> sequenceA es
+    alg (AstAppF es) = foldl' App (Lam "x" (Var 0)) <$> sequenceA es
+    alg (AstLetF var val body) = do
+      body' <- local (var :) body
+      App (Lam var body') <$> val
 
 -- | Since applications do not require parentheses and can contain only a single item,
 -- | the `app` parser is sufficient to parse any expression at all.