Modified to use dependent types and 'Drop' instead of a var index.

2019-08-23 18:38:57 -07:00 · 2019-08-23 18:38:57 -07:00 · 7cb27e8e9a
parent ee7794073c
commit 7cb27e8e9a
6 changed files with 309 additions and 114 deletions
--- a/app/Main.hs
+++ b/app/Main.hs
@ -1,4 +1,3 @@
-{-# LANGUAGE BlockArguments, LambdaCase #-}
 module Main where

 import Control.Monad (forever)
@ -15,4 +14,4 @@ prompt text = do
 main :: IO ()
 main = forever $ parseExpr "stdin" <$> prompt ">> " >>= \case
  Left parseError -> putStrLn $ "Parse error: " ++ show parseError
-  Right expr -> print $ eval expr
+  Right expr -> do print expr; print $ eval expr
--- a/package.yaml
+++ b/package.yaml
@ -12,11 +12,28 @@ description:         Please see the README on GitHub at <https://github.com/jame
 extra-source-files:
 - README.md

+default-extensions:
+- BlockArguments
+- DataKinds
+- DeriveFoldable
+- DeriveFunctor
+- DeriveTraversable
+- FlexibleInstances
+- FunctionalDependencies
+- GADTs
+- KindSignatures
+- LambdaCase
+- MultiParamTypeClasses
+- PolyKinds
+- Rank2Types
+- TemplateHaskell
+- TypeFamilies
+- TypeOperators
+
 dependencies:
 - base >= 4.7 && < 5
 - mtl >= 2.2 && < 3
 - parsec >= 3.1 && < 4
- recursion-schemes >= 5.1 && < 6

 library:
  source-dirs: src
--- a/src/Data/Type/Nat.hs
+++ b/src/Data/Type/Nat.hs
@ -0,0 +1,54 @@
+-- | I would rather depend on the `fin` package than export my own Nat type,
+-- but I couldn't figure out how to write substitution with their definition of SNat,
+-- because it uses `SNatI n =>` instead of `SNat n ->` in the recursive case,
+-- and `withSNat` and `snat` were both providing ambigous type variables and all that.
+-- If anyone could fix it for me, I would gladly accept a PR.
+module Data.Type.Nat where
+
+import Data.Type.Equality ((:~:)(Refl))
+import Numeric.Natural (Natural)
+
+data Nat = Z | S Nat
+
+instance Show Nat where
+  show = show . natToNatural
+
+data SNat :: Nat -> * where
+  SZ :: SNat 'Z
+  SS :: SNat n -> SNat ('S n)
+
+instance Show (SNat n) where
+  show = show . snatToNatural
+
+type family Plus (n :: Nat) (m :: Nat) = (sum :: Nat) where
+  Plus Z      m = m
+  Plus ('S n) m = 'S (Plus n m)
+
+class               SNatI (n :: Nat) where snat :: SNat n
+instance            SNatI 'Z         where snat = SZ
+instance SNatI n => SNatI ('S n)     where snat = SS snat
+
+natToNatural :: Nat -> Natural
+natToNatural Z     = 0
+natToNatural (S n) = 1 + natToNatural n
+
+snatToNat :: forall n. SNat n -> Nat
+snatToNat SZ     = Z
+snatToNat (SS n) = S $ snatToNat n
+
+snatToNatural :: forall n. SNat n -> Natural
+snatToNatural = natToNatural . snatToNat
+
+plusZero :: SNat n -> Plus n 'Z :~: n
+-- trivial: Z + n = n by definition of equality,
+-- and in this case n = Z and thus Z + Z = Z.
+plusZero SZ     = Refl
+-- if n + Z = n, then S n + Z = S n.
+plusZero (SS n) = case plusZero n of Refl -> Refl
+
+plusSuc :: SNat n -> SNat m -> Plus n ('S m) :~: 'S (Plus n m)
+-- trivial: Z + n = n by definition of equality,
+-- and in this case n = S m, and thus Z + S m = S m.
+plusSuc SZ     _ = Refl
+-- if n + S m = S (n + m), then S n + S m = S (S n + m).
+plusSuc (SS n) m = case plusSuc n m of Refl -> Refl
--- a/src/Data/Vec.hs
+++ b/src/Data/Vec.hs
@ -0,0 +1,8 @@
+module Data.Vec where
+
+import Data.Type.Nat
+
+-- | See the documentation of 'Data.Type.Nat' to see why I don't just depend on the `vec` package.
+data Vec :: Nat -> * -> * where
+  VNil  :: Vec 'Z a
+  (:::) :: a -> Vec n a -> Vec ('S n) a
--- a/src/UntypedLambdaCalculus.hs
+++ b/src/UntypedLambdaCalculus.hs
@ -1,97 +1,206 @@
-{-# LANGUAGE TemplateHaskell, TypeFamilies, DeriveFunctor, DeriveFoldable, DeriveTraversable, MultiWayIf, LambdaCase, BlockArguments #-}
-module UntypedLambdaCalculus (Expr (Free, Var, Lam, App), ReaderAlg, eval, cataReader) where
+module UntypedLambdaCalculus where

-import Control.Monad.Reader (Reader, runReader, local, reader, ask, asks)
-import Control.Monad.Writer (Writer, runWriter, listen, tell)
-import Data.Foldable (fold)
-import Data.Functor ((<&>))
-import Data.Functor.Foldable (Base, Recursive, cata, embed, project)
-import Data.Functor.Foldable.TH (makeBaseFunctor)
-import Data.Monoid (Any (Any, getAny))
+import Control.Applicative ((<|>))
+import Data.Type.Nat (Nat (Z, S), SNat (SZ, SS), SNatI, Plus, snat)

-- | An expression using the Reverse De Bruijn representation.
-- | Like De Bruijn representation, variables are named according to their binding site.
-- | However, instead of being named by the number of binders between variable and its binder,
-- | here variables are represented by the distance between their binder and the top level.
-data Expr = Free String
-          -- Var index is bound `index` in from the outermost binder.
-          -- The outermost binder's name is the last element of the list.
-          | Var Int
-          -- This lambda is `index` bindings away from the outermost binding.
-          -- If the index is `0`, then this is the outermost binder.
-          | Lam String Int Expr
-          | App Expr Expr
+-- | Expressions are parametrized by the depth of the variable bindings they may access.
+-- An expression in which no variables are bound (a closed expression) is represented by `Expr 'Z`.
+data Expr :: Nat -> * where
+  -- | The body of a lambda abstraction may reference all of the variables
+  -- bound in its parent, in addition to a new variable bound by the abstraction.
+  Lam :: Expr ('S n) -> Expr n
+  -- | On the other hand, any sub-expression may choose to simply ignore
+  -- the variable bound by the lambda expression,
+  -- only referencing the variables bound in its parent instead.
+  --
+  -- For example, in the constant function `\x. \y. x`,
+  -- although the innermost expression *may* access the innermost binding (`y`),
+  -- it instead only accesses the outer one, `x`.
+  -- Thus the body of the expression would be `Drop Var`.
+  --
+  -- Given the lack of any convention for how to write 'Drop',
+  -- I have chosen to write it as `?x` where `x` is the body of the drop.
+  Drop :: Expr n -> Expr ('S n)
+  -- | For this reason (see 'Drop'),
+  -- variables only need to access the innermost accessible binding.
+  -- To access outer bindings, you must first 'Drop' all of the bindings
+  -- in between the variable and the desired binding to access.
+  Var :: Expr ('S n)
+  -- | Function application. The left side is the function, and the right side is the argument.
+  App :: Expr n -> Expr n -> Expr n
+  -- | A free expression is a symbolic placeholder which reduces to itself.
+  Free :: String -> Expr 'Z

-makeBaseFunctor ''Expr
+instance SNatI n => Show (Expr n) where
+  show expr = show' snat expr
+    where show' :: SNat n -> Expr n -> String
+          show' (SS n) Var         = show n
+          show' SZ     (Free name) = name
+          show' (SS n) (Drop body) = '?' : show' n body
+          show' n      (Lam body)  = "(\\" ++ show n ++ " " ++ show' (SS n) body ++ ")"
+          show' n      (App fe xe) = "(" ++ show' n fe ++ " " ++ show' n xe ++ ")"

-type Algebra   f   a = f a ->          a
-type ReaderAlg f s a = Algebra f (Reader s a)
+-- | The meaning of expressions is defined by how they can be reduced.
+-- There are three kinds of reduction: beta-reduction ('betaReduce'),
+-- which defines how applications interact with lambda abstractions;
+-- eta-reduction ('etaReduce'), which describes how lambda abstractions interact with applications;
+-- and a new form, which I call scope-reduction ('scopeReduce'),
+-- which describes how 'Drop' scope delimiters propogate through expressions.
+--
+-- This function takes an expression and either reduces it,
+-- or, if there is no applicable reduction rule, returns nothing.
+-- The lack of an applicable reduction rule does not necessarily mean
+-- that an expression is irreducible: see 'evaluate' for more information.
+reduce :: Expr n -> Maybe (Expr n)
+-- Note: there are no expressions which are reducible in multiple ways.
+-- Only one of these cases can apply at once.
+reduce expr = scopeReduce expr <|> betaReduce expr <|> etaReduce expr

-cataReader :: Recursive r => ReaderAlg (Base r) s a -> s -> r -> a
-cataReader f initialState x = runReader (cata f x) initialState
+-- | A subexpression to which a reduction step may be applied is called a "redex",
+-- short for "reducible expression".
+isRedex :: Expr n -> Bool
+isRedex expr = isScopeRedex expr || isBetaRedex expr || isEtaRedex expr

-indexOr :: a -> Int -> [a] -> a
-indexOr def index xs
-  | index < length xs && index >= 0 = xs !! index
-  | otherwise = def
+-- | Beta reduction describes how functions behave when applied to arguments.
+-- To reduce a function application, you simply 'substitute` the parameter into the function body.
+--
+-- Beta reduction is the fundamental computation step of the lambda calculus.
+-- Only this rule is necessary for the lambda calculus to be turing-complete;
+-- the other reductions merely define *equivalences* between expressions,
+-- so that every expression has a canonical form.
+betaReduce :: Expr n -> Maybe (Expr n)
+betaReduce (App (Lam fe) xe) = Just $ substitute xe fe
+-- (^) Aside: 'App' represents function application in the lambda calculus.
+-- Haskell convention would be to name the function `f` and the argument `x`,
+-- but that often collides with Haskell functions and arguments,
+-- so instead I will be calling them `fe` and `xe`,
+-- where the `e` stands for "expression".
+betaReduce  _                = Nothing

-instance Show Expr where
-  show = flip cataReader [] \case
-    FreeF name -> return name
-    VarF index -> asks (\boundNames -> indexOr "" (length boundNames - index - 1) boundNames)
-      <&> \name -> name ++ ":" ++ show index
-    LamF name index body' -> local (name :) body' <&> \body ->
-      "(\\" ++ name ++ ":" ++ show index ++ ". " ++ body ++ ")"
-    AppF f' x' -> f' >>= \f -> x' <&> \x ->
-      "(" ++ f ++ " " ++ x ++ ")"
-    
-eval :: Expr -> Expr
-eval x = case reduce innerReduced of
-    Just expr -> eval expr
-    Nothing -> x
-  where innerReduced = embed $ fmap eval $ project x
+-- TODO: Document this.
+substitute :: Expr n -> Expr ('S n) -> Expr n
+substitute = substitute' SZ
+  where substitute' :: SNat n -> Expr m -> Expr ('S (Plus n m)) -> Expr (Plus n m)
+        substitute' SZ     x Var         = x
+        substitute' (SS _) _ Var         = Var
+        substitute' SZ     x (Drop body) = body
+        substitute' (SS n) x (Drop body) = Drop $ substitute' n x body
+        substitute' n      x (App fe xe) = App (substitute' n x fe) (substitute' n x xe)
+        substitute' n      x (Lam body)  = Lam $ substitute' (SS n) x body

-reduce :: Expr -> Maybe Expr
-reduce (Lam name index body) = etaReduce name index body
-reduce (App f x)             = betaReduce f x
-reduce _                     = Nothing
+-- | A predicate determining whether a subexpression would allow a beta-reduction step.
+isBetaRedex :: Expr n -> Bool
+isBetaRedex (App (Lam _) _) = True
+isBetaRedex _               = False

-betaReduce :: Expr -> Expr -> Maybe Expr
-betaReduce (Lam name index body) x = Just $ subst index x body
-betaReduce _                     _ = Nothing
+-- | For any expression `f`, `f` is equivalent to `\x. ?f x`, a property called "eta-equivalence".
+-- The conversion between these two forms is called "eta-conversion",
+-- with the conversion from `f` to `\x. ?f x` being called "eta-expansion",
+-- and its inverse (from `\x. ?f x` to `f`) being called "eta-reduction".
+--
+-- This rule is not necessary for the lambda calculus to express computation;
+-- that's the purpose of 'betaReduce'; rather, it expresses the idea of "extensionality",
+-- which in general describes the principles that judge objects to be equal
+-- if they have the same external properties
+-- (from within the context of the logical system, i.e. without regard to representation).
+-- In the context of functions, this would mean that two functions are equal
+-- if and only if they give the same result for all arguments.
+etaReduce :: Expr n -> Maybe (Expr n)
+etaReduce (Lam (App (Drop fe) Var)) = Just fe
+etaReduce _                         = Nothing

-etaReduce :: String -> Int -> Expr -> Maybe Expr
-etaReduce name index body@(App f (Var index'))
-  -- If the variable bound by this lambda is only used in the right hand of the outermost app,
-  -- then we may delete this function. The absolute position of all binding terms inside
-  -- this one has been decreased by the removal of this lambda, and must be renumbered.
-  | index == index' && unbound index f = Just $ subst index undefined f
-  | otherwise = Nothing
-etaReduce _ _ _ = Nothing
+-- | A predicate determining whether a subexpression would allow an eta-reduction step.
+isEtaRedex :: Expr n -> Bool
+isEtaRedex (Lam (App (Drop _) Var )) = True
+isEtaRedex _                         = False

-unbound :: Int -> Expr -> Bool
-unbound index = not . bound index
+-- | Eta-expansion, the inverse of 'etaReduce'.
+etaExpand :: Expr n -> Expr n
+etaExpand fe = Lam $ App (Drop fe) Var

-bound :: Int -> Expr -> Bool
-bound index = getAny . cata \case
-  VarF index' -> Any $ index == index'
-  expr -> fold expr
+-- TODO: Scope conversion isn't a real conversion relationship!
+-- 'scopeExpand' can only be applied a finite number of times.
+-- That doesn't break the code, but I don't want to misrepresent it.
+-- 'scopeExpand' is only the *left* inverse of 'scopeReduce',
+-- not the inverse overall.
+--
+-- Alternatively, 'scopeExpand' could be defined on expressions with no sub-expressions.
+-- That would make sense, but then 'scopeReduce' would also have to be defined like that,
+-- which would make every reduction valid as well, meaning we can't use it in the same way
+-- we use the other reduction, because it always succeeds, and thus every expression
+-- could be considered reducible.
+--
+-- Perhaps delimiting scope should be external to the notion of an expression,
+-- like how Thyer represented it in the "Lazy Specialization" paper
+-- (http://thyer.name/phd-thesis/thesis-thyer.pdf).
+--
+-- In addition, it really doesn't fit in with the current scheme of reductions.
+-- It doesn't apply to any particular constructor/eliminator relationship,
+-- instead being this bizarre syntactical fragment instead of a real expression.
+-- After all, I could have also chosen to represent the calculus
+-- so that variables are parameterized by `Fin n` instead of being wrapped in stacks of 'Drop'.
+--
+-- I think this problem will work itself out as I work further on evaluation strategies
+-- and alternative representations, but for now, it'll do.
+-- | Scope-conversion describes how 'Drop'-delimited scopes propgate through expressions.
+-- It expresses the idea that a variable is used in an expression
+-- if and only if it is used in at least one of its sub-expressions.
+--
+-- Similarly to 'etaReduce', there is also define an inverse function, 'scopeExpand'.
+scopeReduce :: Expr n -> Maybe (Expr n)
+scopeReduce (App (Drop fe) (Drop xe)) = Just $ Drop $ App fe xe
+-- TODO: I feel like there's a more elegant way to represent this.
+-- It feels like `Lam (Drop body)` should be its own atomic unit.
+-- Maybe I could consider a combinator-based representation,
+-- where `Lam (Drop body)` is just the `K` combinator `K body`?
+scopeReduce (Lam (Drop (Drop body)))  = Just $ Drop $ Lam $ Drop body
+scopeReduce _                         = Nothing

-embedExpr :: Int -> Expr -> Expr
-embedExpr index (Free name) = Free name
-embedExpr index (Var index')
-  | index' >= index = Var $ index' + 1
-  | otherwise = Var index'
-embedExpr index (App f x) = App (embedExpr index f) (embedExpr index x)
-embedExpr index (Lam name index' body) = Lam name (index' + 1) $ embedExpr index body
+-- | A predicate determining whether a subexpression would allow a scope-reduction step.
+isScopeRedex :: Expr n -> Bool
+isScopeRedex (App (Drop _) (Drop _)) = True
+isScopeRedex (Lam (Drop (Drop _)))   = True
+isScopeRedex _                       = False

-subst :: Int -> Expr -> Expr -> Expr
-subst index val (Free name) = Free name
-subst index val (Var index')
-  | index == index' = val
-  -- There is now one fewer binding site between the innermost binding site and `index`,
-  -- thus if the binding site is further in than ours, it must be decremented.
-  | index <  index' = Var $ index' - 1
-  | otherwise = Var index'
-subst index val (App f x) = App (subst index val f) (subst index val x)
-subst index val (Lam name index' body) = Lam name (index' - 1) $ subst index (embedExpr index val) body
+-- | Scope-expansion, the left inverse of 'scopeReduce'.
+scopeExpand :: Expr n -> Maybe (Expr n)
+scopeExpand (Drop (App fe xe))       = Just $ App (Drop fe) (Drop xe)
+scopeExpand (Drop (Lam (Drop body))) = Just $ Lam $ Drop $ Drop body
+scopeExpand _                        = Nothing
+
+-- | An expression is in "normal form" if it contains no redexes (see 'isRedex').
+isNormal :: Expr n -> Bool
+isNormal expr = not (isRedex expr) && case expr of
+  -- In addition to this expression not being a redex,
+  -- we must check that none of its subexpressions are redexes either.
+  App fe xe -> isNormal fe && isNormal xe
+  Lam e     -> isNormal e
+  Drop e    -> isNormal e
+  _         -> True
+
+-- TODO: Finish the below documentation on reduction strategies. I got bored.
+-- | Now that we have defined the ways in which an expression may be reduced ('reduce'),
+-- we need to define a "reduction strategy" to describe in what order we will apply reductions.
+-- Different reduction strategies have different performance characteristics,
+-- and even produce different results.
+--
+-- One of the most important distinctions between strategies is whether they are "normalizing".
+-- A normalising strategy will terminate if and only if
+-- the expression it is normalizing has a normal form.
+--
+-- I have chosen to use a normalizing reduction strategy.
+eval :: Expr n -> Expr n
+eval expr = case reduce innerReduced of
+  Just e -> eval e
+  -- The expression didn't make any progress,
+  -- so we know that no further reductions can be applied here.
+  -- It must be blocked on something.
+  -- TODO: return why we stopped evaluating,
+  -- so we can avoid redundantly re-evaluating stuff if nothing changed.
+  Nothing -> innerReduced
+  where innerReduced = case expr of
+          -- TODO: Factor out this recursive case (from 'isNormal' too).
+          App fe xe -> App (eval fe) (eval xe)
+          Lam e -> Lam (eval e)
+          Drop e -> Drop (eval e)
+          x -> x
--- a/src/UntypedLambdaCalculus/Parser.hs
+++ b/src/UntypedLambdaCalculus/Parser.hs
@ -1,21 +1,19 @@
-{-# LANGUAGE TemplateHaskell, TypeFamilies, DeriveFunctor, DeriveFoldable, DeriveTraversable, DataKinds #-}
 module UntypedLambdaCalculus.Parser (parseExpr) where

 import Control.Applicative (liftA2)
-import Control.Monad.Reader (local, ask)
-import Data.List (foldl', elemIndex)
-import Data.Functor.Foldable.TH (makeBaseFunctor)
+import Control.Monad.Reader (Reader, runReader, withReader, asks)
+import Data.Type.Equality ((:~:)(Refl))
+import Data.Type.Nat
+import Data.Vec
 import Text.Parsec (SourceName, ParseError, (<|>), many, sepBy, letter, alphaNum, char, between, spaces, parse, string)
 import Text.Parsec.String (Parser)
-import UntypedLambdaCalculus (Expr (Free, Var, Lam, App), ReaderAlg, cataReader)
+import UntypedLambdaCalculus (Expr (Free, Var, Lam, App, Drop))

 data Ast = AstVar String
         | AstLam [String] Ast
         | AstApp [Ast]
         | AstLet String Ast Ast

-makeBaseFunctor ''Ast
-
 -- | A variable name.
 name :: Parser String
 name = liftA2 (:) letter $ many alphaNum
@ -60,28 +58,38 @@ let_ = do
 consumesInput :: Parser Ast
 consumesInput = let_ <|> var <|> lam <|> parens app

-toExpr :: Ast -> Expr
-toExpr = cataReader alg (0, [])
-  where
-    alg :: ReaderAlg AstF (Int, [String]) Expr
-    alg (AstVarF varName) = do
-      (_, bound) <- ask
-      return $ case varName `elemIndex` bound of
-        Just index -> Var $ length bound - index - 1
-        Nothing    -> Free varName
-    alg (AstLamF vars body) = 
-      foldr (\v e -> do
-                (index, bound) <- ask
-                Lam v index <$> local (\_ -> (index + 1, v : bound)) e) body vars
-    alg (AstAppF es) = do
-      (index, _) <- ask
-      foldl' App (Lam "x" index (Var index)) <$> sequenceA es
-    alg (AstLetF var val body) = do
-      (index, bound) <- ask
-      body' <- local (\_ -> (index + 1, var : bound)) body
-      App (Lam var index body') <$> val
+toExpr :: Ast -> Expr 'Z
+toExpr ast = runReader (toExpr' ast) VNil
+  -- TODO: This code is absolutely atrocious.
+  -- It is in dire need of cleanup.
+  where toExpr' :: SNatI n => Ast -> Reader (Vec n String) (Expr n)
+        toExpr' (AstVar name) = asks $ makeVar snat SZ
+          where makeVar :: SNat n -> SNat m -> Vec n String -> Expr (Plus m n)
+                makeVar SZ     m VNil            = dropEm m $ Free name
+                makeVar (SS n) m (var ::: bound) = case plusSuc m n of
+                  Refl
+                    | name == var -> dropEm2 n m
+                    | otherwise   -> makeVar n (SS m) bound
+        toExpr' (AstApp es) = asks $ thingy id es
+        toExpr' (AstLam [] body) = toExpr' body
+        toExpr' (AstLam (name:names) body) =
+          fmap Lam $ withReader (name :::) $ toExpr' $ AstLam names body
+        toExpr' (AstLet var val body) =
+          App <$> toExpr' (AstLam [var] body) <*> toExpr' val

+        thingy :: SNatI n => (Expr n -> Expr n) -> [Ast] -> Vec n String -> Expr n
+        thingy f []     _     = f $ Lam Var
+        thingy f (e:es) bound = thingy (flip App (runReader (toExpr' e) bound) . f) es bound
+
+        dropEm :: SNat m -> Expr n -> Expr (Plus m n)
+        dropEm SZ     e = e
+        dropEm (SS n) e = Drop $ dropEm n e
+
+        dropEm2 :: SNat n -> SNat m -> Expr ('S (Plus m n))
+        dropEm2 _ SZ = Var
+        dropEm2 n (SS m) = Drop $ dropEm2 n m
+          
 -- | Since applications do not require parentheses and can contain only a single item,
 -- | the `app` parser is sufficient to parse any expression at all.
-parseExpr :: SourceName -> String -> Either ParseError Expr
+parseExpr :: SourceName -> String -> Either ParseError (Expr 'Z)
 parseExpr sourceName code = toExpr <$> parse app sourceName code