diff --git a/package.yaml b/package.yaml index c59e485..a72a9b6 100644 --- a/package.yaml +++ b/package.yaml @@ -41,6 +41,7 @@ default-extensions: - PatternSynonyms - ScopedTypeVariables - StandaloneDeriving +- TupleSections - TypeFamilies - ViewPatterns diff --git a/src/Ivo/Syntax/Base.hs b/src/Ivo/Syntax/Base.hs index b5f0198..ccb3e97 100644 --- a/src/Ivo/Syntax/Base.hs +++ b/src/Ivo/Syntax/Base.hs @@ -1,152 +1,936 @@ +-- | The language's abstract syntax. +-- +-- The abstract syntax is very forward-thinking +-- in that it's designed to accomodate for features +-- which haven't been implemented yet, +-- partly to make sure that I won't need to change it in a way +-- that will involve rewriting lots of other code when I do implement those features. +-- Plus, this serves as documentation for what the syntax /will/ +-- be and why I designed it to be the way it is. +-- +-- That means that not all of this syntax is necessarily supported by the parser, +-- and that even if it /is/, that the associated feature isn't necessarily +-- implemented by the typechecker, interpreter, or compiler. module Ivo.Syntax.Base - ( Expr (..), ExprF (..), Ctr (..), Pat, Def, DefF (..), PatF (..), VoidF, Text, NonEmpty (..) - , Type (..), TypeF (..), Scheme (..), tapp - , substitute, substitute1, rename, rename1, free, bound, used - , Parse, AST, ASTF, ASTX, ASTXF (..), NonEmptyDefFs (..) - , pattern LetFP, pattern LetRecP, pattern LetRecFP - , pattern PNat, pattern PNatF, pattern PList, pattern PListF, pattern PChar, pattern PCharF - , pattern PStr, pattern PStrF, pattern HoleP, pattern HoleFP - , simplify + ( Text, NonEmpty (..) + , Scope (..), TopLevel (..), Definition (..), PublicOrPrivate (..) + , Kind, Type, Expr (..), Sig (..), Sig' (..), Lit (..) + , Pattern (..), Guard, CaseBranch (..) + , FlexibleSeparatorSyntax ) where -import Ivo.Expression.Base +import Data.List.NonEmpty (NonEmpty (..)) +import Data.Text (Text) -import Data.Functor.Foldable (embed, project) -import Data.List.NonEmpty (NonEmpty (..), toList) -import Data.Text qualified as T +newtype Scope = Scope [TopLevel] + deriving Show -data Parse --- | The abstract syntax tree reflects the structure of the externally-visible syntax. +data TopLevel + -- | Bring the definitions in a module into scope. + -- + -- > open Module1 + -- + -- An explicit import list is also permitted: + -- + -- > open Module2 ( foo ; bar ; baz ) + -- + -- All of the imports can be re-exported using @pub open@, + -- or individual imports may be made public + -- by individually prefixing each with @pub@. + -- These syntaxes may not be combined. + -- + -- > pub open Module1 ; + -- > open Module2 ( pub foo ; bar ; pub baz ) ; + -- + -- Fields can additionally be renamed or hidden: + -- + -- > open Module3 using ( foo ; bar ) renaming ( foo → baz ) ; + -- > open Module4 hiding ( foo ) ; + -- > open Module5 hiding ( foo ) renaming ( bar → baz ) ; + -- + -- Combining an explicit import list with a hiding list is forbidden, + -- seeing as doing so would be completely pointless. + -- + -- It is also possible to open signatures, but only into other signatures. + -- When opening into a signature, all declarations are added to + -- the enclosing signature's declarations /regardless/ of + -- whether the open was marked public, + -- but default definitions are imported only as part of a @pub open@. + -- + -- Note that this feature may be used as part of this idiom for extending a module: + -- + -- > Λ m → sig Monoid { + -- > pub open Semigroup m ; + -- > ∅ : m ; + -- > // ... identity laws --- + -- > } + -- + -- Data constructor patterns are exported + -- separately from the constructors themselves. + -- The patterns are exported under the type constructor's name; + -- the constructors are exported like functions. + -- Exporting patterns implicitly exports the constructors as well, + -- but the constructors can subsequently be hidden. + -- Here's what I mean: + -- + -- > // export both constructors and patterns: + -- > open data using ( MyType ( Con1 ; Con2 ) ) ; + -- > // export just constructors, not patterns: + -- > open data using ( MyType ; Con1 ; Con2 ) ; + -- > // export just patterns, not constructors: + -- > open data using ( MyType ( Con1 ; Con2 ) ) hiding ( Con1 ; Con2 ) ; + -- > // export the patterns and data constructors, but not the type constructor: + -- > open data using ( MyType ( Con1 ; Con2 ) ) hiding ( MyType ) ; + -- + -- Combined with default definitions in the signature + -- (to whom both the constructors and patterns are /always/ in scope) + -- this can be used to implement opaque data types + -- with smart constructors and/or eliminators, + -- because the type could only be constructed via exported default functions, + -- but pattern matched against as usual (or vice versa, or both). + -- + -- An "import all" syntax is also supported: + -- + -- // import all constructors + -- > open data using ( MyType ( .. ) ) ; + -- // hide all constructors + -- > open data hiding ( MyType ( .. ) ) ; + -- + -- This uses 'FlexibleSeparatorSyntax'. + = Open PublicOrPrivate Expr [(Text, Maybe (Maybe [Text]))] -- * @using@ + [(Text, Maybe (Maybe [Text]))] -- * @hiding@ + [(Text, Text)] -- * @renaming@ + -- | See 'Definition'. + -- + -- A definition may be exported from the enclosing module or signature + -- by prefixing it with the keyword @pub@: + -- + -- > pub foo : bar = baz + | Define PublicOrPrivate Definition + -- | At the top level, @data @ is syntactic sugar for @open data @. + -- + -- See 'Open' and 'Data' for more information.. + | TLData Expr + -- | At the top level, @axiom @ is syntactic sugar for @open axiom @. + -- + -- See 'Open' and 'Axiom' for more information. + | TLAxiom Expr + -- | At the top level, @adt <...>@, @gadt <...>@, and @sig <...>@ + -- are syntactic sugar for @open data adt <...>@, @open data gadt <...>@, + -- and @open data sig <...>@ respectively. + -- + -- See 'Open', 'Data', and 'Sig' for more information. + | TLSig Sig + deriving Show + +-- | 'Public' is written with @pub@; +-- 'Private' indicates the abscence of an access modifier. +data PublicOrPrivate = Public | Private + deriving Show + +-- | Define a name as an expression, with an optional type annotation. -- --- It includes syntactic sugar, which allows multiple ways to express many constructions, --- e.g. multiple definitions in a single let expression or multiple names bound by one abstraction. -type AST = Expr Parse --- There is no technical reason that the AST can't allow nullary applications and so forth, --- nor is there any technical reason that the parser couldn't parse them, --- but the parser *does* reject them to avoid confusing edge cases like `let x=in`, --- so they're forbidden here too so that the syntax tree can't contain data +-- > foo : type = ; // with type annotation +-- > bar = ; // with inferred type +data Definition = Definition Text (Maybe Type) Expr + deriving Show + +-- | An expression which represents the type of a type. +type Kind = Type + deriving Show +-- | An expression which represents the type of a value. +type Type = Expr + deriving Show + +data Expr + --- + --- Expressions pertaining to signatures and modules + --- + + -- | Automatically implement a signature by creating (co)inductive data types + -- with the given constructors/eliminators, and additionally pattern matching. + -- + -- > data + -- + -- See 'ADT', 'GADT', and 'Sig' for examples of the data types this can implement. + -- + -- @data@ can be used to implement things with kind @Type → ... → Sig@ + -- by turning all type arguments into signature parameters, + -- but generally speaking, the use of signature parameters should be preferred + -- see 'Sig' (the type, not the constructor). + = Data Expr + -- | Automatically "implement" a signature by taking all of its definitions as axioms. + -- + -- > axiom + -- + -- Axiom "implementations" do not have any computational meaning + -- and can only be typechecked, but not executed. + -- It may be possible to execute a program which contains axioms, + -- but the program will crash when the axioms are encountered. + -- + -- Axioms are useful when writing proofs which you are not interested in executing, + -- or as placeholders for functions you haven't implemented yet. + | Axiom Expr + -- | A signature literal. See 'ADT', 'GADT', and 'Sig' for examples. + | SigE Sig + -- | A module expression with an optional signature annotation. + -- Like with signatures, modules may have type parameters. + -- + -- If a module has a signature annotation, + -- then only the names in the signature will be exported + -- (and /all/ of those names will, even if they were not marked public + -- at their definition site). + -- All names exposed by the signature will be typechecked + -- against their types in the signature. + -- + -- If no signature is specified, then the module will have an anonymous signature + -- based on the inferred (or specified) types for all of its public definitions. + -- + -- > Λ a → mod : Monoid (List a) { + -- > ∅ = [] ; + -- > _<>_ = /* don't feel like writing this code right now */ ; + -- > + -- > monoid_id_left = ... ; + -- > ... + -- > } + -- + -- Like with most scopes, it is possible to open a module within another module; + -- public imports will be re-exported if specified by the module's signature. + -- + -- A public definition which is not included in the module's signature + -- will issue a warning, but not fail to compile. + -- Because definitions are exported if and only if they are part of the signature, + -- @pub@ is always redundant in a module with a type signature. + -- + -- This uses 'FlexibleSeparatorSyntax'. + | Mod [Either (NonEmpty Text, Kind) Text] (Maybe Type) [TopLevel] + -- | A field accessor for signatures and modules. + -- + -- A signature @sig : Sig@ which defines @foo : bar@ + -- and a module implementing that signature @mod : sig@, then: + -- + -- > sig::foo : sig → bar + -- > mod::foo : bar + | Access Expr Text + + --- + --- Expressions pertaining to types + --- + + -- | Universal type quantification with optional kind annotations. + -- + -- With unicode syntax: + -- + -- > ∀ foo ( bar : baz ) quux → type + -- + -- With ASCII syntax: + -- + -- > forall foo ( bar : baz ) quux -> type + -- + -- Forall is constructed by 'TypeLam' and eliminated by 'TypeApp'; + -- however, occurrences of 'TypeLam' and 'TypeApp' + -- will usually be inferred and do not need to be written. + -- + -- 'Forall' is used for abstracting over /computationally-irrelevant/ terms; + -- the terms are usually dependent and invisible, but /may/ not be. + -- + -- 'Arrow' is used for abstracting over /computationally-relevant/ terms; + -- the terms are always visible and usually independent, but /may/ be. + -- + -- The distinction between these quantifiers was first made known to me + -- by Sam Lindley and Conor McBride's Hasochism paper. + | Forall (NonEmpty (Either (NonEmpty Text, Kind) Text)) Type + -- | Function types with optional argument names; + -- multiple arguments may be bound with one type. + -- + -- With unicode syntax: + -- + -- > ( foo bar : baz ) → quux → fwee + -- + -- With ASCII syntax: + -- + -- > ( foo bar : baz ) -> quux -> fwee + -- + -- Arguments @foo@ and @bar@ both have type @baz@; + -- the argument with type @quux@ does not have a name; + -- @fwee@ is the function's return type. + -- + -- Note that an empty list of argument names + -- corresponds with @foo → bar@, /not/ @(: foo) → bar@, + -- which is syntactically invalid. + -- + -- 'Arrow' is constructed by 'Lam' and eliminated by 'App'. + -- + -- 'Arrow', as a relevant quantifier, contrasts 'Forall', an irrelevant quantifier; + -- more information in the documentation for 'Forall'. + | Arrow [Text] Type Type + -- | Type annotations (or kind annotations; they are the same thing). + -- + -- > foo : bar + | Ann Expr Type + -- | A hole represents a type or value to be inferred. + -- If inference fails, then the inferred type or kind + -- and detailed information about the surrounding context will be printed as an error, + -- to be used as a tool for type-driven development. + -- + -- > _ + | Hole + + --- + --- Expressions pertaining to values + --- + + -- | The constructor for 'Forall', + -- which explicitly binds a forall-quantified type variable + -- as a value-level variable with an optional kind annotation. + -- + -- With unicode syntax: + -- + -- > Λ a → foo : ∀ a → bar + -- > Λ ( a : k ) → foo : ∀ ( a : k ) → bar + -- + -- With ASCII syntax: + -- + -- > ^ a -> foo : forall a -> bar + -- > ^ ( a : k ) -> foo : forall ( a : k ) -> bar + -- + -- The name of the variable and the type and in the value + -- do not have to be the same, and in these specific examples, + -- there is no need whatsoever to annotate the kind in both + -- the type and value; if one is specified, the other will + -- /always/ be able to be inferred. + -- + -- These variables still are type variables and do not contain data; + -- they will be completely erased at runtime so the computational behavior + -- of a function cannot depend on them. + -- + -- Type lambdas are usually inferred and do not need to be written down. + -- The purpose of type lambdas is to bring type variables into scope + -- for use by type annotations and type applications. + -- + -- In the presence of inferred type lambdas, + -- the explicit type lambda will always bind the /innermost/ forall. + -- There are a few reasons for this: + -- + -- 1. Type quantifiers can only depend on types to their left, + -- meaning that the innermost quantifiers tend to be the most complex + -- and most difficult to infer. + -- + -- 2. If the kind of a quantified type is inferred to be polymorphic, + -- a new kind variable will be introduced and quantified + -- on the /left/ for the reasons described in (1). + -- These implicitly-quantified variables /cannot/ + -- be bound by type lambdas or applied using type applications, + -- but even if they could be, binding quantifiers inner-to-outer + -- would prevent this transformation from affecting + -- the meaning of the expression. + -- + -- 3. Module parameters may be given specific values + -- /or/ be made polymorphic, depending on how the module is used. + -- These inserted foralls are inserted (obviously) on the /left/. + | TypeLam (NonEmpty (Either (NonEmpty Text, Kind) Text)) Expr + -- | Type application, the eliminator for 'Forall'. + -- + -- Suppose you have an expression @foo : forall a → bar a@, + -- but the type @a@ is ambiguous and cannot be inferred. + -- You may explicitly instantiate @foo@ with @Nat@: + -- + -- > foo @ Nat : bar Nat + -- + -- Type application can usually be inferred without needing to be specified. + | TypeApp Expr Type + -- | An lambda abstraction, the constructor for 'Arrow'. + -- + -- With unicode syntax, if @baz : quux@: + -- + -- > λ foo → baz : bar → quux + -- > λ ( foo : bar ) → baz : bar → quux + -- + -- With ASCII syntax: + -- + -- > λ foo -> baz + -- > λ ( foo : bar ) -> baz + -- + -- A lambda expression may pattern match against its arguments: + -- + -- > λ _ → foo // the argument is irrelevant + -- > λ x y → foo // multiple arguments + -- > λ ( x , y ) → foo // pattern match against a *-pair (data) + -- > λ ( π₂ → y ) → foo // project the first argument of an &-pair (codata) + -- + -- As a special case, a lambda which matches an impossible pattern + -- may have no body: + -- + -- > λ ! // that's really the whole thing! + -- + -- Even more generally, it may branch based on the pattern match, + -- after binding any number of arguments which it doesn't branch on, + -- which will be shared across every case branch. + -- + -- > λ (x , y) { + -- > S n | Left m → f n m ; + -- > Z | Right z → x z ; + -- > _ | _ → y ; + -- > } + -- + -- Pattern matching is a very complex syntactic construct + -- which can do even more than just this; + -- see 'CaseBranch' for the full detail of what it can do. + -- + -- Although the 'Lam' constructor allows for either + -- the list of patterns (for non-branching arguments) + -- or the list of case branches (for branching arguments) + -- to be empty, /at least one/ must consume an argument; + -- both may not be empty. + | Lam [Pattern] [CaseBranch] + -- | A lambda with only one, trivial branch, e.g. @λ x y z -> e@. + -- + -- See 'Lam'. + | LamBlock [Pattern] Expr + -- | Function application, the eliminator for 'Arrow'. + -- (See also 'AppI', mixfix function application.) + -- + -- Syntactically, function application is represented by juxtiposition. + -- + -- If @foo : baz -> quux@ and @bar : baz@: + -- + -- > foo bar : quux + | App Expr (NonEmpty Expr) + -- | Mixfix function application (see also 'App'). + -- + -- A mixfix operator name is composed of text + -- plus underscores where the arguments go. + -- Here, we replace the underscores with a space + -- corresponding with each argument in the list, + -- and, in the case of a partially applied mixfix operator, + -- leave the underscores for each argument /not/ applied. + -- + -- Here's a fully applied example for @if_then_else_@: + -- + -- > if foo then bar else baz + -- + -- This is a single infix application: @AppI "if then else " [foo, bar, baz]@. + -- + -- On the other hand, if we only provide the @then@ case: + -- + -- > if_then bar else_ + -- + -- This is represented as @AppI "if_then else_" [bar]@. + -- + -- If a mixfix operator is left completely unapplied, @if_then_else_@, + -- then it should be parsed as a regular variable name + -- and applied using regular function application. + | AppI Text (NonEmpty Expr) + -- | A let expression, used to define variables in a local scope. + -- + -- The definitions in a let expression can include both variable definitions + -- and /opening modules/ (and consequently, even local data types). + -- + -- Defining two variables: + -- + -- > let foo = bar ; baz : quuz = splee in fwap + -- + -- Opening a module and defining a variable: + -- + -- > let open Module ; foo = bar in baz + -- + -- The definitions in a let expression may be recursive, interdependent, + -- or even mutually recursive, just like top-level declarations. + -- (However, the use of these features may interfere with type inference.) + -- Variables bound in a let expression may shadow variables, + -- including those defined by /other/ let expressions, + -- but a variable can only be defined at most once in a single let expression. + -- + -- This uses 'FlexibleSeparatorSyntax'. + | Let [TopLevel] Expr + -- | A case expression, used to pattern match against some variables. + -- + -- Multiple expressions to be matched against are separated by bars (@|@) + -- with 'FlexibleSeparatorSyntax'. + -- + -- > case foo | bar { + -- > π₁ ( Left x | _ ) → x ; + -- > π₁ ( Right _ | z ) → z ; + -- > π₂ ( Right y | _ ) → y ; + -- > π₂ ( Left _ | z ) → z ; + -- > } + -- + -- A case expression with no arguments may be used just for guards: + -- + -- > case { + -- > ? cond1 → x ; + -- > ? cond2 → y ; + -- > ? else → z ; + -- > } + -- + -- 'Lam' may also perform case branching; lambda case syntax + -- should generally be preferred over binding a variable with lambda + -- and then immediately 'Case'-ing it. + -- + -- For more information about the (very complex) syntax of case branches, + -- see 'CaseBranch'. + | Case [Expr] [CaseBranch] + -- | A case with only one, trivial branch, e.g. + -- @case x | y of a , b | c , d → e@. + -- + -- See 'Case'. + | CaseBlock [Expr] [Pattern] Expr + -- | Integer, string, character, and list literals. + | Lit (Lit Expr) + -- | Variables. + | Var Text + deriving Show + +-- | A signature literal. -- --- that the parser would refuse to accept. --- As a matter of curiosity, here's why `let x=in` was syntactically valid: --- 1. Parentheses in `let` statements are optional, infer them: `let x=()in()`. --- 2. Insert optional whitespace: `let x = () in ()`. --- 3. Nullary application expands to the identity function because --- the identity function is the left identity of function application: --- `let x=(\x.x) in \x.x`. -type instance AppArgs Parse = NonEmpty AST -type instance AbsArgs Parse = NonEmpty Text -type instance LetArgs Parse = NonEmpty (Def Parse) -type instance CtrArgs Parse = [AST] -type instance AnnX Parse = () -type instance XExpr Parse = ASTX +-- Signatures are the type of modules. +-- They're basically record types, +-- associating a bunch of names with a bunch of types. +-- +-- Signatures can be used for many things, including defining data types. +-- See 'Data', 'Axiom', and 'Mod' for a few ways they can be used. +-- +-- However, there are multiple syntaxes for writing them, +-- which is (obviously) what this type enumerates. +-- +-- Do not confuse type functions which return a signature (@Λ a → @), +-- signature parameters (@sig a { ... }@), +-- and type indices (@sig { Foo : a → Type }@)! +-- These three features serve entirely different purposes. +-- +-- Type functions which return a signature /are not signatures per se/ +-- and /must/ be instantiated with a type argument to become a signature. +-- The created signature is /not/ polymorphic over the bound type. +-- +-- For example, this signature describes for any type @m@ that @m@ is a semigroup: +-- +-- > Λ m → sig { +-- > _<>_ : m → m → m ; +-- > <>IsAssoc : ( x y z : m ) → ( x <> y ) <> z ≡ x <> ( y <> z ) ; +-- > +-- +-- Signature parameters are universally quantified +-- across all definitions in the signature, +-- so that the signature may reference its own definitions with any type parameter, +-- and critically, the /implementation/ must be polymorphic over the +-- signature parameter as well. +-- +-- The way quantifiers work is that they're added as /explicit/ parameters to kinds +-- and as /implicit/ parameters to types for all declarations and definitions +-- in the signature. +-- +-- For example, this signature describes a list data type +-- which works for any type @a@, plus a @join@ operation: +-- +-- > sig a { +-- > List : Type ; +-- > [] : List a ; +-- > _∷_ : List a → List a → List a ; +-- > join : List (List a) → List a ; +-- > } +-- +-- Note that the @join@ operation would be illegal for @Λ a → sig { ... }@ +-- because the signature's implementation doesn't have to be polymorphic in @a@, +-- but @join@ instantiates @a@ with @List a@. +-- +-- It is the difference between describing a polymorphic type +-- and a family of monomorphic types which isn't necessarily universally implemented. +-- +-- Finally, type indices are just the arguments to a type constructor +-- which are not quantified by @sig@ (but rather by each individual definition) +-- and thus are not necessarily parametric, e.g.: +-- +-- > sig { +-- > Fin : Nat → Type ; +-- > +-- > FZ : Fin Z ; +-- > FS : ∀ n → Fin n → Fin ( S n ) ; +-- > } +data Sig + -- | Every kind of signature literal allows + -- for type parameters with optional kind signatures, + -- so those have been factored out into this constructor. + = SigQs [Either (NonEmpty Text, Kind) Text] Sig' + deriving Show +-- | A signature literal variant, excluding type parameters. +-- +-- ('SigQs' is the constructor with the type parameters.) +data Sig' + -- | An algebraic data type (ADT) signature. @False@ for data, @True@ for codata. + -- + -- > adt Nat { Z + S Nat } + -- > adt Stream a { Head a & Tail ( Stream a ) } + -- + -- Preceding @+@s and @&@s are also permitted, which looks like this: + -- + -- The syntax @+@ and @&@ were chosen because the branches of ADTs correspond with + -- additive disjunction (written @⊕@ or @+@) and additive conjunction (written @&@) + -- from linear logic. + -- + -- (The arguments correspond with multiplicative conjunction (@⊗@ or @*@), + -- and multiplicative disjunction (@⅋@ or @|@) respectively; + -- however, these arguments are written using mere juxtaposition + -- instead of using an explicit syntax because it is unambiguous.) + -- + -- This uses 'FlexibleSeparatorSyntax', e.g.: + -- + -- > adt List a { + -- > + [] + -- > + _∷_ a ( List a ) + -- > } + = ADT Bool Text (Maybe Kind) [(Text, [Type])] + -- | A generalized algebraic data type (GADT) signature. + -- + -- GADTs are similar to ADTs. However, thanks to their extended syntax, + -- they also may be non-parametric in their type arguments + -- (non-parametric arguments, i.e. indices, are specified in the kind signature), + -- or be used to define higher-inductive types. + -- + -- `Vec a n` has a type parameter `a` and a type index `n`: + -- + -- > gadt Vec a : Nat → Type { + -- > [] : Vec a Z ; + -- > _∷_ : ∀n. a → Vec a n → Vec a ( S n ) ; + -- + -- `Multiset a` is a higher-inductive type with a commutative `union` constructor. + -- + -- > gadt Multiset a { + -- > empty : Set a ; + -- > singleton : a → Set a ; + -- > _union_ : Set a → Set a → Set a ; + -- > + -- > // An additional constructor for Multiset's *identity* type. + -- > union_commutative : ( xs ys : Set a ) → xs union ys ≡ ys union xs; + -- > } + -- + -- This uses 'FlexibleSeparatorSyntax'. + | GADT Text (Maybe Kind) [(NonEmpty Text, Type)] + -- | The most general syntax for signatures. + -- + -- General signatures are simply collections of names and their types. + -- They are not necessarily the template for a /data/ type; + -- they may also be used for defining typeclasses, + -- an abstract interface for multiple implementations of a data structure, + -- a set of definitions to be taken as axiomatic, etc. + -- + -- However, general signatures still /can/ be used to define data types + -- which are potentially more general than even GADTs, + -- for example mutually recursive types or inductive-inductive types. + -- + -- A signature which describes the Monoid typeclass: + -- + -- > sig m { + -- > empty : m ; + -- > _<>_ : m -> m -> m ; + -- > + -- > monoid_id_left : ( y : m ) → empty <> y ≡ y ; + -- > monoid_id_right : ( x : m ) → x <> empty ≡ x ; + -- > monoid_assoc : ( x y z : m ) → x <> (y <> z) ≡ (x <> y) <> z ; + -- > } + -- + -- A signature which describes an axiom, the law of the excluded middle: + -- + -- > sig a { lem : ¬ a + a } + -- + -- The simultaneous definition of two mutually-recursive data types: + -- + -- > sig { + -- > Even , Odd : Nat → Type ; + -- > + -- > zeroIsEven : Even Z ; + -- > succOddIsEven : ∀n. Odd n → Even ( S n ) ; + -- > succEvenIsOdd : ∀n. Even n → Odd ( S n ) ; + -- > } + -- + -- (Multiple comma-separated declarations use 'FlexibleSeparatorSyntax'.) + -- + -- An inductive-recursive pair of types: + -- + -- > // An inductive-recursive pair of types: + -- > open data sig ( a : Type ) ( _#_ : a → a → Type ) { + -- > // A list in which all elements are distinct from each other + -- > // by the distinctness relation `_#_`. + -- > Dlist : Type ; + -- > // A proof that the first argument is distinct + -- > // from all of the elements of the second argument. + -- > Fresh : a → Dlist a _#_ → Type ; + -- > + -- > nil : Dlist a _#_ ; + -- > cons : ( x : a ) → ( xs : Dlist a _#_ ) → Fresh a _#_ x xs → Dlist a _#_ ; + -- > + -- > freshNil : ( x : a ) → Fresh a _#_ x nil ; + -- > freshCons : ( x : a ) + -- > // the original list + -- > → ( head : a ) → ( tail : Dlist a _#_ ) → ( p : Fresh a _#_ head tail ) + -- > // `x` is distinct from both the head and tail of the original list + -- > → x # head -> Fresh a _#_ x tail + -- > // thus `x` is distinct from the /entire/ original list. + -- > → Fresh a _#_ x ( cons head tail p ) ; + -- > } + -- + -- Adding yet more complexity, signatures may also contain a default implementation, + -- which is a definition, rather than a type declaration. + -- Even further, the type of the default implementation may be a supertype of + -- the declared type of the field, so that the default implementation + -- may only be used if the definitions of other fields + -- allow the default implementation's type to unify with the declared type. + -- + -- If the default implementation's type is identical to the declared type, + -- then it is sufficient to only include the default implementation's definition; + -- that the field is part of the signature is implied by its definition. + -- + -- For typechecking purposes, the equality of signatures + -- is based only on the declared types of fields + -- (including fields implicitly declared by a definition) + -- and the definitions themselves are completely ignored; + -- for the purposes of implementing a module, + -- the default definitions will be exported as the implementations + -- of their respective declared fields in the signature unless overridden + -- by the module implementation. + -- + -- Examples of default implementations: + -- + -- > sig { + -- > Fwip : Type ; + -- > foo : a ; // no default implementation + -- > bar : b = x ; // `bar` defaults to `x` + -- > baz : Thing ; + -- > // `baz` defaults to `cool` only if an instance of `Cool Thing` is in scope + -- > baz : Cool Thing ⇒ Thing = cool ; + -- > } + -- + -- I don't feel like including examples of `open` in signatures + -- because I'm sick of writing docs, so I hope you get the idea + -- based on my wall of text. + -- + -- Finally, this is how visibility works: + -- + -- * All declarations are public; @pub@ modifiers are forbidden + -- because they would be strictly redundant. + -- + -- * All definitions associated with a declaration are public; + -- @pub@ modifiers are allowed but redundant. + -- + -- * Definitions without a declaration are public only if marked @pub@; + -- otherwise, there will be no "inferred declaration". + | Sig [Either (NonEmpty Text, Type) TopLevel] + deriving Show -type ASTX = ASTXF AST +-- | A pattern matching against a data type's constructors +-- or a codata type's eliminators. +-- +-- A nested pattern is composed of some depth of codata eliminators +-- containing some depth of data constructors; +-- matching codata eliminators is used to construct codata types, +-- whereas matching data constructors is used to eliminate data types. +-- +-- Patterns are used primarily in the context of 'CaseBranch'es. +data Pattern + -- | Bind the matched value to a variable, with an optional type annotation. + -- + -- > foo : bar + -- > foo + -- + -- A variable name may be bound multiple times in the same pattern + -- if and only if the type checker can prove that both + -- uses have identicial values. + = PatVar Text (Maybe Type) + -- | An irrelevant pattern completely ignores the matched value. + -- + -- > _ + | Irrelevant + -- | An impossible pattern matches a value of a type which + -- the typechecker can prove is not possibly inhabited. + -- + -- > ! + | Impossible + -- | Match against a data constructor or codata eliminator. + -- (See also 'PatAppI'.) + -- + -- > Left x + -- > π₁ xy + -- > Pair x y + | PatApp Text [Pattern] + -- | Match against a mixfix data constructor or codata eliminator. + -- (See also 'PatApp'.) + -- + -- The mixfix constructor must be completely applied to patterns. + -- + -- > x ∷ xs // Pattern match against a list constructor. + | PatAppI Text (NonEmpty Pattern) + -- | Apply a function to the value and then pattern match against it. + -- This may be used with e.g. copattern eliminators. + -- + -- If the matched value has type @a@, then @( f : a → b ) → ( x : b )@. + -- + -- > π₁ → foo // Pattern match against the left value of an &-pair. + -- + -- This syntax is based on Haskell's view patterns. + | View Expr Pattern + | PatLit (Lit Pattern) + deriving Show -type ASTF = ExprF Parse -type instance AppArgsF Parse = NonEmpty -type instance LetArgsF Parse = NonEmptyDefFs -type instance CtrArgsF Parse = [] -type instance XExprF Parse = ASTXF +-- | A case branch has a left-hand side which +-- consists of some number of patterns to be matched and guards, +-- and a right-hand side consisting of an expression. +-- In an expression with multiple cases branches, +-- each left-hand side has the same type, +-- and each right-hand side has the same type (plus copattern matches). +-- +-- To give a bit of an example, in: +-- +-- > { +-- > pat1 | pat2 → foo ; +-- > pat3 | pat4 → bar ; +-- > } +-- +-- `pat1` and `pat3` both match a value of the same type, +-- `pat2` and `pat4` both match a value of the same different type, +-- and `foo` and `bar` both have yet another different matching type. +-- +-- In the case of copattern matches, e.g. +-- +-- > { +-- > π₁ → foo ; +-- > π₂ → bar ; +-- > } +-- +-- then @foo : a@ and @bar : b@ have different types, +-- but the return type of the expression as a whole remains the same: @a & b@. +-- +-- In every context, the case branch syntax uses 'FlexibleSeparatorSyntax'. +-- +-- Additionally, if there is only one case branch, +-- all of the parentheses and semicolons can usually be elided +-- in a context where block syntax is permitted: +-- +-- > +-- +-- The behavior of case branches is yet more complex: see 'Refinement' and 'Guards' +-- for the additional things that case branches can do. +data CaseBranch + -- | A branch may introduce additional values to match against using __plus clauses__. + -- This allows /refining/ previous branches, for example by introducing a proof + -- so that two variables may be /unified/ by matching against the identity type, + -- or by proving that a type is uninhabited so that an impossible pattern may be used. + -- + -- For example, when matching against two values @x : Nat@, @y : Nat@ + -- such that @x ~ z × 2@ and @y ~ z + z@, + -- and given a function @p : (z : Nat) → z × 2 ≡ z + z@: + -- + -- > { + -- > // additionally pattern match against the value `p z` + -- > x y + p z + -- > // now that a proof of equality is in-scope, we can refine our match + -- > x x | Refl → ... + -- > } + -- + -- The additional patterns may be matched without modifying the original pattern; + -- in this case, the patterns list will only + -- include the patterns for newly-introduced matches, + -- and the rest of the pattern match can be assumed to be the same. + -- + -- > { + -- > some complex patterns | foo ; + -- > // `...` is identical to writing `some complex patterns`; + -- > // all of the variables that it bound are still in scope. + -- > ... | pat1 → asdasd + -- > ... | pat2 → iouqwe + -- > } + -- + -- This syntax is based on Agda's with clauses. + = Refinement [Pattern] [Expr] (NonEmpty CaseBranch) + -- | In addition to pattern matches, branches may be subdivided by 'Guard' clauses, + -- which are conditional expressions. + -- If no guard clause is true, then further case patterns will be tried. + -- (This is different from the behavior of Haskell, which can only match one pattern, + -- and will fail due to non-exhaustive patterns if none of the guards match.) + -- + -- > { + -- > One x ? isGood x → foo ; // if `x` is good + -- > Two y ? isBad y → bar // if `y` is bad + -- > ? else → baz ; // if `y` is /not/ bad + -- > Three z ? isFun z → quux ; // if `z` is fun + -- > _ → floo ; // if `x` is not good /or/ `z` is not fun + -- > } + -- + -- With regards to view patterns and guard fall-through, + -- patterns and guard clauses will always be evaluated top-to-bottom, + -- to the extent that the behavior of the program would be affected + -- by the order of evaluation for guard clauses. + -- + -- This syntax is based on Haskell's guard clauses. + | Guards [Pattern] (NonEmpty (Guard, Expr)) + -- | A case branch without refinement or guards. + | PlainCase [Pattern] Expr + deriving Show -data ASTXF r - -- | A let expression where the definitions may reference each other recursively. - = LetRecP_ !(Text, r) !r - -- | A natural number literal, e.g. `10`. - | PNat_ Int - -- | A list literal, e.g. `[x, y, z]`. - | PList_ [r] - -- | A character literal, e.g. `'a`. - | PChar_ Char - -- | A string literal, e.g. `"abcd"`. - | PStr_ Text - -- | A type hole. - | HoleP_ - deriving (Eq, Functor, Foldable, Traversable, Show) +-- | A guard is a kind of syntax for conditionals used as part of a 'CaseBranch'. +-- +-- A guard is a sequence of plain pattern matches (/no/ plus or guard clauses) +-- against expressions /or/ expressions with a truth value (e.g. booleans)¹. +-- A guard returns true only if all patterns match and all expressions are truthy. +-- +-- For example: +-- +-- > ? x ← foo | y ← bar | isNice x && isNaughty y +-- +-- Or as a simpler example: +-- +-- > ? isCool foo +-- +-- ¹: I might add a typeclass for 'truthiness' to allow using guard clause syntax. +-- Or it'll just be hardcoded for use by booleans; I'm not sure. +type Guard = [Either (Pattern, Expr) Expr] -instance RecursivePhase Parse where - projectLetArgs = NonEmptyDefFs - embedLetArgs = getNonEmptyDefFs +-- | Special syntactic support for a few very important types. +data Lit r + -- | @12310@, @+1338848@, @-131@ + = LitInt Integer + -- | @'a@, @'#@, @'\n@, + | LitChar Char + -- | @"And so I said, \"this language SUCKS!\""@ + | LitStr Text + -- | @[ x ; y ; z ]@, @[ ]@ + -- + -- Note that to minimize the number of reserved characters, + -- (so that e.g. @[]@ can be a constructor name and not just syntax) + -- square brackets are /keywords/ and not /tokens/, + -- so e.g. in @[x ; y]@, @[x@ and @y]@ are /identifiers/. + -- You /must/ leave a space: @[ x ; y ]@. + -- + -- This uses 'FlexibleSeparatorSyntax'. + | LitList [r] + deriving Show -newtype NonEmptyDefFs r = NonEmptyDefFs { getNonEmptyDefFs :: NonEmpty (Text, r) } - deriving (Eq, Functor, Foldable, Traversable, Show) - -pattern LetFP :: NonEmpty (Text, r) -> r -> ASTF r -pattern LetFP ds e = LetF (NonEmptyDefFs ds) e - -pattern LetRecP :: (Text, AST) -> AST -> AST -pattern LetRecP d e = ExprX (LetRecP_ d e) - -pattern LetRecFP :: (Text, r) -> r -> ASTF r -pattern LetRecFP d e = ExprXF (LetRecP_ d e) - -pattern PNat :: Int -> AST -pattern PNat n = ExprX (PNat_ n) - -pattern PNatF :: Int -> ASTF r -pattern PNatF n = ExprXF (PNat_ n) - -pattern PList :: [AST] -> AST -pattern PList es = ExprX (PList_ es) - -pattern PListF :: [r] -> ASTF r -pattern PListF es = ExprXF (PList_ es) - -pattern PChar :: Char -> AST -pattern PChar c = ExprX (PChar_ c) - -pattern PCharF :: Char -> ASTF r -pattern PCharF c = ExprXF (PChar_ c) - -pattern PStrF :: Text -> ASTF r -pattern PStrF s = ExprXF (PStr_ s) - -pattern PStr :: Text -> AST -pattern PStr s = ExprX (PStr_ s) - -pattern HoleP :: AST -pattern HoleP = ExprX HoleP_ - -pattern HoleFP :: ASTF r -pattern HoleFP = ExprXF HoleP_ - -{-# COMPLETE VarF, AppF, AbsF, LetFP, CtrF, CaseF, AnnF, ExprXF #-} -{-# COMPLETE Var, App, Abs, Let, Ctr, Case, Ann, LetRecP, PNat, PList, PChar, PStr, HoleP #-} -{-# COMPLETE VarF, AppF, AbsF, LetF , CtrF, CaseF, AnnF, LetRecFP, PNatF, PListF, PCharF, PStrF, HoleFP #-} -{-# COMPLETE VarF, AppF, AbsF, LetFP, CtrF, CaseF, AnnF, LetRecFP, PNatF, PListF, PCharF, PStrF, HoleFP #-} - --- TODO: Implement Substitutable for AST. - --- | Combine nested expressions into compound expressions or literals when possible. -simplify :: AST -> AST -simplify = simplify' . embed . fmap simplify . project - where - -- Combine sequences of nat constructors into literals. - simplify' (Ctr CZero []) = PNat 0 - simplify' (Ctr CSucc [PNat n]) = PNat (n + 1) - -- Combine sequences of string constructors into string literals. - simplify' (Ctr CChar [PNat n]) = PChar (toEnum n) - simplify' o@(Ctr CCons [PChar c, PList []]) - | c /= '"' = PStr (T.singleton c) - | otherwise = o - simplify' o@(Ctr CCons [PChar c, PStr cs]) - | c /= '"' = PStr (T.cons c cs) - | otherwise = o - -- Combine sequences of list contructors into list literals. - simplify' (Ctr CNil []) = PList [] - simplify' (Ctr CCons [x, PList xs]) = PList (x : xs) - -- Move applications into constructors. - simplify' (App (Ctr ctr es1) es2) = simplify' $ Ctr ctr (es1 <> toList es2) - -- Combine reducible applications into let expressions. - simplify' (App (Abs (nx :| ns) eb) (ex :| es)) = simplify' $ app' es $ Let ((nx, ex) :| []) $ abs' ns eb - where app' [] e = e - app' (ex2:es2) e = App e (ex2 :| es2) - - abs' [] e = e - abs' (nx2:ns2) e = Abs (nx2 :| ns2) e - -- Combine sequences of nested applications into n-ary applications. - simplify' (App (App f es1) es2) = simplify' $ App f (es1 <> es2) - -- Combine sequences of nested abstractions into n-argument abstractions. - simplify' (Abs ns1 (Abs ns2 e)) = simplify' $ Abs (ns1 <> ns2) e - -- Combine sequences of nested let expressions into n-definition let expressions. - simplify' (Let ds1 (Let ds2 e)) = simplify' $ Let (ds1 <> ds2) e - simplify' e = e +-- | When the languages has things within brackets separated by a divider, +-- that divider will usually be allowed redundantly both preceding and trailing. +-- +-- This allows for more flexible layouts, for example with 'CaseBranch': +-- +-- > { +-- > ; +-- > ; +-- > } +-- > +-- > { +-- > ; +-- > ; +-- > } +-- > +-- > { +-- > ; +-- > } +-- +-- The benefit of trailing or preceding dividers for multi-line things like this +-- is that it allows for uniformity, which looks good, and additionally, +-- it means that re-ordering elements is as simple as copy-pasting the +-- entire line, without having to add or remove dividers anywhere. +-- (Most programmers probably already know about this trick.) +-- +-- This sort of syntax occurs in many places, e.g.: +-- 'ADT', 'GADT', 'Sig', 'Mod', 'CaseBranch', 'Use', and 'Let'. +-- +-- This data type isn't actually used for anything; +-- it's just so that I can link to this explanation in Haddock. +data FlexibleSeparatorSyntax diff --git a/src/Ivo/Syntax/Parser.hs b/src/Ivo/Syntax/Parser.hs index 1d21c15..d81c8d6 100644 --- a/src/Ivo/Syntax/Parser.hs +++ b/src/Ivo/Syntax/Parser.hs @@ -1,305 +1,604 @@ module Ivo.Syntax.Parser ( ParseError, parse - , Declaration, TopLevelAST, ProgramAST - , parseAST, parseTopLevel, parseProgram - , typeParser, schemeParser, astParser, topLevelParser, programParser + , programParser, replParser ) where import Ivo.Syntax.Base -import Data.Functor.Identity (Identity) import Data.List.NonEmpty (fromList) import Data.Text qualified as T import Prelude hiding (succ, either) -import Text.Parsec hiding (label, token, spaces) +import Text.Parsec hiding (label, token, spaces, tokens, anyToken) import Text.Parsec qualified -import Text.Parsec.Expr import Text.Parsec.Text (Parser) -spaces :: Parser () -spaces = Text.Parsec.spaces >> optional (try (comment >> spaces)) - where - comment, lineComment, blockComment :: Parser () - comment = blockComment <|> lineComment - lineComment = label "line comment" $ do - _ <- try (string "//") - _ <- many1 (noneOf "\n") - pure () - blockComment = label "block comment" $ do - _ <- try (string "/*") - _ <- many1 $ notFollowedBy (string "*/") >> anyChar - _ <- string "*/" - pure () +-- | Parse the contents of a file containing Ivo source code, +-- with an optional shebang. +-- +-- See 'Scope'. +programParser :: Parser Scope +programParser = do + optional shebang + scope -label :: String -> Parser a -> Parser a -label = flip Text.Parsec.label +-- | Parse REPL input (which consists of top-level statements and expressions). +-- +-- See 'TopLevel' and 'Expr'. +replParser :: Parser [Either TopLevel Expr] +replParser = sepEndBy ((Left <$> topLevel) <|> (Right <$> ambiguous)) (token ";") -token :: Char -> Parser () -token ch = label [ch] $ char ch *> spaces - -keywords :: [Text] -keywords = ["let", "in", "Left", "Right", "S", "Z", "forall", "Char", "Void", "Unit", "Nat", "Char"] - --- | A keyword is an exact string which is not part of an identifier. -keyword :: Text -> Parser () -keyword kwd = label (T.unpack kwd) $ do - try do - _ <- string $ T.unpack kwd - notFollowedBy letter - spaces - --- | An identifier is a sequence of letters which is not a keyword. -identifier :: Parser Text -identifier = label "identifier" $ do - notFollowedBy anyKeyword - T.pack <$> (many1 letter <* spaces) - where anyKeyword = choice $ map keyword keywords - -variable :: Parser AST -variable = label "variable" $ Var <$> identifier - -tvariable :: Parser Type -tvariable = label "variable" $ TVar <$> identifier - -many1' :: Parser a -> Parser (NonEmpty a) -many1' p = fromList <$> many1 p - -many2 :: Parser a -> Parser (a, NonEmpty a) -many2 p = (,) <$> p <*> many1' p - -grouping :: Parser AST -grouping = label "grouping" $ between (token '(') (token ')') ambiguous - -tgrouping :: Parser Type -tgrouping = label "grouping" $ between (token '(') (token ')') tambiguous - -application :: Parser AST -application = label "application" $ uncurry App <$> many2 block - -tapplication :: Parser Type -tapplication = label "application" $ uncurry tapp' <$> many2 tblock - where tapp' t1 (t2 :| ts) = tapp (t1 : t2 : ts) - -abstraction :: Parser AST -abstraction = label "lambda abstraction" $ Abs <$> between lambda (token '.') (many1' identifier) <*> ambiguous - where lambda = label "lambda" $ (char '\\' <|> char 'λ') *> spaces - -definition :: Parser (Def Parse) -definition = label "definition" $ do - name <- identifier - token '=' - value <- ambiguous - pure (name, value) - -let_ :: Parser AST -let_ = label "let expression" $ letrecstar <|> letstar - where - letrecstar = LetRecP <$> between (try (keyword "letrec")) (keyword "in") definition <*> ambiguous - letstar = Let <$> between (keyword "let") (keyword "in") definitions <*> ambiguous - - definitions :: Parser (NonEmpty (Def Parse)) - definitions = fromList <$> sepBy1 definition (token ';') - -ctr :: Parser AST -ctr = label "data constructor" $ pair <|> unit <|> either <|> nat <|> list <|> str - where - unit, pairCtr, tuple, either, left, right, - zero, succ, natLit, consCtr, cons, charCtr, charLit, strLit :: Parser AST - unit = Ctr CUnit [] <$ keyword "()" - pair = pairCtr <|> tuple - pairCtr = Ctr CPair [] <$ keyword "(,)" - tuple = try $ between (token '(') (token ')') do - e1 <- ambiguous - token ',' - e2 <- ambiguous - pure $ Ctr CPair [e1, e2] - either = left <|> right - left = Ctr CLeft [] <$ keyword "Left" - right = Ctr CRight [] <$ keyword "Right" - nat = zero <|> succ <|> natLit - zero = Ctr CZero [] <$ keyword "Z" - succ = Ctr CSucc [] <$ keyword "S" - natLit = (PNat . read <$> many1 digit) <* spaces - list = cons <|> consCtr <|> listLit - consCtr = Ctr CCons [] <$ keyword "(::)" - cons = try $ between (token '(') (token ')') do - e1 <- ambiguous - keyword "::" - e2 <- ambiguous - pure $ Ctr CCons [e1, e2] - listLit = fmap PList $ between (token '[') (token ']') $ sepEndBy ambiguous (token ',') - str = charCtr <|> charLit <|> strLit - charCtr = Ctr CChar [] <$ keyword "Char" - charLit = fmap PChar $ char '\'' *> anyChar <* spaces - strLit = fmap (PStr . T.pack) $ between (token '"') (token '"') $ many (noneOf "\"") - -pat :: Parser (Pat Parse) -pat = label "case alternate" $ do - (c, ns) <- label "pattern" $ - pair <|> unit <|> left <|> right <|> zero <|> succ <|> nil <|> cons <|> char' - keyword "->" - e <- ambiguous - pure $ Pat c ns e - where - pair = try $ between (token '(') (token ')') do - e1 <- identifier - token ',' - e2 <- identifier - pure (CPair, [e1, e2]) - unit = (CUnit, []) <$ keyword "()" - left = do - keyword "Left" - e <- identifier - pure (CLeft, [e]) - right = do - keyword "Right" - e <- identifier - pure (CRight, [e]) - zero = (CZero, []) <$ keyword "Z" - succ = do - keyword "S" - e <- identifier - pure (CSucc, [e]) - nil = (CNil, []) <$ keyword "[]" - cons = try $ between (token '(') (token ')') do - e1 <- identifier - keyword "::" - e2 <- identifier - pure (CCons, [e1, e2]) - char' = do - keyword "Char" - e <- identifier - pure (CChar, [e]) - -case_ :: Parser AST -case_ = label "case patterns" $ do - token '{' - pats <- sepEndBy pat (token ';') - token '}' - pure $ Case pats - -ann :: Parser AST -ann = label "type annotation" $ do - e <- block - token ':' - t <- tambiguous - pure (Ann () e t) - -hole :: Parser AST -hole = label "hole" $ HoleP <$ token '_' - -tlist :: Parser Type -tlist = between (token '[') (token ']') $ ((TApp TList <$> tambiguous) <|> pure TList) - -tinfix :: Parser Type -tinfix = buildExpressionParser ttable tblock - where - ttable :: [[Operator Text () Identity Type]] - ttable = [ [Infix (binop TAbs <$ arrSym) AssocRight] - , [Infix (binop TProd <$ token '*') AssocRight] - , [Infix (binop TSum <$ token '+') AssocRight] - ] - - arrSym :: Parser () - arrSym = token '→' <|> keyword "->" - - binop :: Type -> Type -> Type -> Type - binop c t1 t2 = TApp (TApp c t1) t2 - -tctr :: Parser Type -tctr = tlist <|> tunit <|> tvoid <|> tnat <|> tchar - where - tunit = TUnit <$ (keyword "Unit" <|> keyword "⊤") - tvoid = TVoid <$ (keyword "Void" <|> keyword "⊥") - tnat = TNat <$ (keyword "Nat" <|> keyword "ℕ") - tchar = TChar <$ keyword "Char" - -tfinite :: Parser Type -tfinite = tvariable <|> tlist <|> tctr <|> tgrouping - -tblock :: Parser Type -tblock = tfinite - -tambiguous :: Parser Type -tambiguous = try tinfix <|> try tapplication <|> tblock - -tforall :: Parser Scheme -tforall = do - keyword "forall" <|> token '∀' - names <- many1 (identifier <* spaces) - token '.' - ty <- tambiguous - pure $ TForall names ty - -scheme :: Parser Scheme -scheme = tforall <|> (TForall [] <$> tambiguous) - --- | Guaranteed to consume a finite amount of input -finite :: Parser AST -finite = label "finite expression" $ variable <|> hole <|> ctr <|> case_ <|> grouping - --- | Guaranteed to consume input, but may continue until it reaches a terminator -block :: Parser AST -block = label "block expression" $ abstraction <|> let_ <|> finite - --- | Not guaranteed to consume input at all, may continue until it reaches a terminator -ambiguous :: Parser AST -ambiguous = label "any expression" $ try ann <|> try application <|> block - -typeParser :: Parser Type -typeParser = tambiguous - -schemeParser :: Parser Scheme -schemeParser = scheme - -astParser :: Parser AST -astParser = ambiguous - -parseAST :: Text -> Either ParseError AST -parseAST = parse (spaces *> ambiguous <* eof) "input" - -type Declaration = (Text, Maybe Type, AST) - -definitionAnn :: Parser Declaration -definitionAnn = do - name <- identifier - ty <- optionMaybe $ token ':' *> tambiguous - token '=' - e <- ambiguous - pure (name, ty, e) - -declaration :: Parser Declaration -declaration = notFollowedBy (try let_) >> (declrec <|> decl) - where - declrec = do - try $ keyword "letrec" - (name, ty, expr) <- definitionAnn - pure (name, ty, LetRecP (name, expr) (Var name)) - decl = do - keyword "let" - definitionAnn - --- | A program is a series of declarations and expressions to execute. -type ProgramAST = [Declaration] -type TopLevelAST = [Either Declaration AST] - -topLevel :: Parser (Either Declaration AST) -topLevel = try (Left <$> declaration) <|> (Right <$> ambiguous) - -topLevelParser :: Parser TopLevelAST -topLevelParser = spaces *> sepEndBy topLevel (token ';') <* eof +--- +--- This is a very long module which does not even document the +--- syntactic constructs which it is implementing. +--- On the other hand, 'Ivo.Syntax.Base' goes into great detail' +--- about the syntax while hardly containing any actual code. +--- Thus, to assist with navigation, the order +--- in which parsers are defined in this module +--- is the same as the order in which they are described in 'Ivo.Syntax.Base'. +--- I would recommend keeping both files open at once while working on this code +--- so you can cross-reference the informal description of the syntax +--- with the parsers. +--- shebang :: Parser () -shebang = do +shebang = label "shebang" do try $ keyword "#!" skipMany (noneOf "\n") spaces -programParser :: Parser ProgramAST -programParser = shebang *> sepEndBy declaration (token ';') <* eof +-- | 'Scope' +scope :: Parser Scope +scope = Scope <$> many (topLevel <* token ";") -parseTopLevel :: Text -> Either ParseError TopLevelAST -parseTopLevel = parse topLevelParser "input" +topLevel, tlOpen, tlDef, tlData, tlAxiom, tlSig :: Parser TopLevel -parseProgram :: Text -> Either ParseError ProgramAST -parseProgram = parse programParser "input" +-- | 'TopLevel' +topLevel = tlOpen <|> tlDef <|> tlData <|> tlAxiom <|> tlSig + +-- | 'Open' +tlOpen = label "open statement" $ + Open <$> publicOrPrivate + <*> ambiguous + <*> openUsing + <*> openHiding + <*> openRenaming + where + openUsing, openHiding :: Parser [(Text, Maybe (Maybe [Text]))] + openRenaming :: Parser [(Text, Text)] + + openUsing = option [] do + keyword "using" + flexibleParens openId + + openHiding = option [] do + keyword "hiding" + flexibleParens openId + + openId = do + name <- identifier + ctrs <- optionMaybe $ openAll <|> openCtrs + pure (name, ctrs) + + openRenaming = option [] do + keyword "renaming" + flexibleParens $ cobetween identifier tkArr identifier + + openAll, openCtrs :: Parser (Maybe [Text]) + + openAll = Nothing <$ keyword ".." + + openCtrs = optionMaybe $ flexibleParens identifier + +-- | 'Define' +tlDef = Define <$> publicOrPrivate <*> definition + +-- | 'TLData' +tlData = TLData <$> data_ + +-- | 'TLAxiom' +tlAxiom = TLAxiom <$> axiom + +-- | 'TLSig' +tlSig = TLSig <$> sig + +-- | 'PublicOrPrivate' +publicOrPrivate :: Parser PublicOrPrivate +publicOrPrivate = (Public <$ keyword "pub") <|> pure Private + +-- | 'Definition' +definition :: Parser Definition +definition = label "definition" do + name <- identifier + ty <- optionMaybe typeSig + token "=" + expr <- ambiguous + pure $ Definition name ty expr + +ambiguous, block, finite :: Parser Expr +-- | 'Expr's which are not guaranteed to consume input before recursing. +ambiguous = label "expression" $ + exAccess <|> exArrow <|> exAnn <|> exAppI <|> exApp <|> exTypeApp + +-- | 'Expr's with a terminal symbol on its left. +block = label "block expression" $ + exData <|> exAxiom <|> exForall <|> exTypeLam <|> exBlockLam <|> + exLet <|> exBlockCase <|> finite + +-- | 'Expr's with a terminal symbol on both the left /and/ right. +finite = label "finite expression" $ + exSig <|> exMod <|> exHole <|> exFiniteCase <|> exFiniteLam <|> + exLit <|> exVar <|> exGroup + +exAccess, exArrow, exAnn, exAppI, exApp, exTypeApp, + exData, exAxiom, exForall, exTypeLam, exBlockLam, exFiniteLam, exLet, + exSig, exMod, exHole, exBlockCase, exFiniteCase, exLit, exVar, exGroup + :: Parser Expr + +exGroup = label "parenthetical expression" $ betweenParens ambiguous + +typeSig, kindSig, modSig :: Parser Type + +typeSig = label "type signature" $ keyword ":" *> ambiguous + +kindSig = label "kind signature" typeSig + +modSig = label "signature" typeSig + +-- | 'Data' +exData = Data <$> data_ + +data_ :: Parser Expr +data_ = label "data expression" do + keyword "data" + ambiguous + +-- | 'Axiom' +exAxiom = Axiom <$> axiom + +axiom :: Parser Expr +axiom = label "axiom expression" do + keyword "axiom" + ambiguous + +-- | 'SigE' +exSig = SigE <$> sig + +-- | 'Mod' +exMod = label "module expression" do + keyword "mod" + params <- sigParams + sig <- optionMaybe modSig + body <- flexibleBraces topLevel + pure $ Mod params sig body + +-- | 'Access' +exAccess = label "field access" $ try do + expr <- block + tkColCol + name <- identifier + pure $ Access expr name + +-- | @∷@ +tkColCol :: Parser () +tkColCol = label "∷" $ token "∷" <|> token "::" + +-- | 'Forall' +exForall = label "∀ type" $ do + tkForall + uncurry Forall <$> typeBinder + +-- | @∀@ +tkForall :: Parser () +tkForall = label "∀" $ keyword "∀" <|> keyword "forall" + +-- | Everything about @forall@ and @^@ is the same +-- except for the token and whether a type or expr +-- (which are also syntactically the same object) +-- comes next. +-- +-- This factors out all of that stuff they have in common. +typeBinder :: Parser (NonEmpty (Either (NonEmpty Text, Kind) Text), Expr) +typeBinder = do + vars <- many1' typeBinder' + tkArr + ty <- ambiguous + pure (vars, ty) + where + typeBinder', kindedBinder, plainBinder + :: Parser (Either (NonEmpty Text, Kind) Text) + + typeBinder' = kindedBinder <|> plainBinder + + kindedBinder = betweenParens $ do + names <- many1' identifier + kind <- kindSig + pure $ Left (names, kind) + + plainBinder = Right <$> identifier + +-- | 'Arrow' +exArrow = label "→ type" $ try do + (names, argTy) <- arrowArgs + tkArr + rTy <- ambiguous + pure $ Arrow names argTy rTy + + where + arrowArgs, namedArgs, typeArg :: Parser ([Text], Type) + + arrowArgs = namedArgs <|> typeArg + + namedArgs = betweenParens do + names <- many1 identifier + ty <- kindSig + pure (names, ty) + + typeArg = ([],) <$> block + +-- | @→@ +tkArr :: Parser () +tkArr = label "→" $ keyword "→" <|> keyword "->" + +-- | 'Ann' +exAnn = label "annotated expression" $ try do + expr <- block + ty <- typeSig + pure $ Ann expr ty + +-- | 'Hole' +exHole = label "hole" $ Hole <$ keyword "_" + +-- | 'TypeLam' +exTypeLam = label "type Λ" $ do + tkBigLam + uncurry TypeLam <$> typeBinder + +-- | @Λ@ +tkBigLam :: Parser () +tkBigLam = label "Λ" $ keyword "Λ" <|> keyword "^" + +-- | 'TypeApp' +exTypeApp = label "type application" $ try do + expr <- block + token "@" + arg <- ambiguous + pure $ TypeApp expr arg + +-- | 'Lam' with only one undelimited case branch +exBlockLam = label "λ expression" $ try do + args <- many pattern_ + tkArr + body <- ambiguous + pure $ LamBlock args body + +-- | 'Lam' with delimited case branches +exFiniteLam = label "λ-case expression" $ try do + simpleArgs <- many pattern_ + body <- caseBranches + pure $ Lam simpleArgs body + +tkLam :: Parser () +tkLam = label "λ" $ keyword "λ" <|> keyword "\\" + +-- | 'App' +exApp = label "application" $ try do + ef <- finite + exs <- many finite + exfinal <- block + pure $ App ef (snoc' exs exfinal) + +-- | 'AppI' +exAppI = label "infix application" $ + fail "infix expressions not yet supported" + +-- | 'Let' +exLet = label "let expression" do + keyword "let" + decls <- flexibleSepBy1 (token ";") topLevel + keyword "in" + body <- ambiguous + pure $ Let decls body + +-- | 'Case' with only one undelimited case branch +exBlockCase = label "case expression" $ try do + keyword "case" + exprs <- flexibleSepBy (keyword "|") ambiguous + body <- caseBranches + pure $ Case exprs body + +-- | 'Case' +exFiniteCase = label "case-of expression" $ try do + keyword "case" + exprs <- flexibleSepBy1 (keyword "|") ambiguous + keyword "of" + pats <- flexibleSepBy1 (keyword "|") pattern_ + tkArr + body <- ambiguous + pure $ CaseBlock exprs pats body + +-- | 'Lit' +exLit = label "literal" $ Lit <$> lit ambiguous + +-- | 'Var' +exVar = label "variable" $ Var <$> identifier + +-- | 'Sig' +sig, sigADT, sigGADT, sigSig :: Parser Sig +sig = label "signature" $ sigADT <|> sigGADT <|> sigSig + +sigParams :: Parser [Either (NonEmpty Text, Kind) Text] +sigParams = label "signature parameters" $ many $ annParam <|> unannParam + where + annParam, unannParam :: Parser (Either (NonEmpty Text, Kind) Text) + + annParam = betweenParens do + names <- many1' identifier + kind <- kindSig + pure $ Left (names, kind) + + unannParam = Right <$> identifier + +-- | 'ADT' +sigADT = label "adt" do + keyword "adt" + name <- identifier + params <- sigParams + kind <- optionMaybe kindSig + (codataQ, body) <- betweenBraces $ + emptyData <|> emptyCodata <|> dataCtrs <|> codataElims + pure $ SigQs params $ ADT codataQ name kind body + where + emptyData, emptyCodata, dataCtrs, codataElims + :: Parser (Bool, [(Text, [Type])]) + + emptyData = try $ (False, []) <$ keyword "+" + emptyCodata = try $ (True, []) <$ keyword "&" + + dataCtrs = try $ (False,) <$> flexibleSepBy1 (keyword "+") do + name <- identifier + ctrs <- many $ notFollowedBy (keyword "+") >> finite + pure (name, ctrs) + + codataElims = try $ (True,) <$> flexibleSepBy1 (keyword "&") do + name <- identifier + elims <- many $ notFollowedBy (keyword "&") >> finite + pure (name, elims) + +-- | 'GADT' +sigGADT = label "gadt" do + keyword "gadt" + name <- identifier + params <- sigParams + kind <- optionMaybe kindSig + ctrs <- flexibleBraces declaration + pure $ SigQs params $ GADT name kind ctrs + +-- | 'Sig' +sigSig = label "sig" do + keyword "sig" + params <- sigParams + ctrs <- flexibleBraces $ (Left <$> declaration) <|> (Right <$> topLevel) + pure $ SigQs params $ Sig ctrs + +declaration :: Parser (NonEmpty Text, Type) +declaration = label "declaration" $ (,) <$> many1' identifier <*> typeSig + +pattern_, finitePattern, patVar, patIrr, patImp, patApp, + patAppI, patView, patLit, patGroup :: Parser Pattern + +-- | 'Pattern' +pattern_ = label "pattern" $ + patApp <|> patAppI <|> patView <|> finitePattern + +finitePattern = + patIrr <|> patImp <|> patLit <|> patVar <|> patGroup + +patGroup = betweenParens pattern_ + +-- | 'PatVar' +patVar = label "pattern variable" $ PatVar <$> identifier <*> optionMaybe typeSig + +-- | 'Irrelevant' +patIrr = label "irrelevant pattern" $ Irrelevant <$ keyword "_" + +-- | 'Impossible' +patImp = label "impossible pattern" $ Impossible <$ keyword "!" + +-- | 'PatApp' +patApp = label "pattern application" $ do + name <- identifier + pats <- many1 finitePattern + pure $ PatApp name pats + +-- FIXME: Implement infix pattern application +-- | 'PatAppI' +patAppI = label "infix pattern application" $ + fail "infix pattern application not yet implemented" + +-- | 'View' +patView = label "view pattern" $ try $ View <$> block <*> (tkArr *> pattern_) + +-- | 'PatLit' +patLit = label "literal pattern" $ PatLit <$> lit pattern_ + +caseBranches :: Parser [CaseBranch] +caseBranches = label "case branches" $ flexibleBraces caseBranch + +caseBranch :: Parser CaseBranch +caseRefinement, caseGuards, casePlain :: [Pattern] -> Parser CaseBranch + +-- | 'CaseBranch' +caseBranch = label "case branch" do + pats <- many1 pattern_ + caseRefinement pats <|> caseGuards pats <|> casePlain pats + +-- FIXME: Implement case refinement +-- | 'Refinement' +caseRefinement pats = label "case refinement" $ + fail "case refinement not implemented" + +-- FIXME: Implement case guards +-- | 'Guards' +caseGuards pats = label "guard clauses" $ + fail "case guards not yet implemented" + +-- | 'PlainCase' +casePlain pats = do + tkArr + PlainCase pats <$> ambiguous + +-- FIXME: Implement guard clauses +-- | 'Guard' +guard :: Parser Guard +guard = label "guard clause" $ + fail "guard clauses not yet implemented" + +lit, litList :: Parser a -> Parser (Lit a) +litInt, litChar, litStr :: Parser (Lit a) + +-- | 'Lit' +lit m = label "literal" $ litInt <|> litChar <|> litStr <|> litList m + +-- | 'LitInt' +litInt = label "integer literal" $ LitInt . read <$> try do + sign <- optionMaybe $ char '+' <|> char '-' + digits <- many1 digit + pure $ maybe digits (: digits) sign + +-- | 'LitChar' +litChar = label "character literal" $ + fmap LitChar $ between (char '\'') (char '\'') $ noneOf "\\'\n" + +-- | 'LitStr' +litStr = label "string literal" $ fmap (LitStr . T.pack) $ + between (char '"') (char '"') $ many1 $ noneOf "\\\"\n" + +-- | 'LitList' +litList m = label "list literal" $ LitList <$> flexibleBrackets m + +-- | 'FlexibleSeparatorSyntax' +flexibleSepBy :: Parser a -> Parser b -> Parser [b] +flexibleSepBy delim m = do + optional delim + sepEndBy m delim + +-- | 'FlexibleSeparatorSyntax' +flexibleSepBy1 :: Parser a -> Parser b -> Parser [b] +flexibleSepBy1 delim m = do + optional delim + sepEndBy1 m delim + +flexibleParens, flexibleBrackets, flexibleBraces :: Parser a -> Parser [a] + +-- | 'FlexibleSeparatorSyntax', with the @;@ separator between @(@ @)@ +flexibleParens = betweenParens . flexibleSepBy (token ";") + +-- | 'FlexibleSeparatorSyntax', with the @;@ separator between @[@ @]@ +flexibleBrackets = betweenBrackets . flexibleSepBy (token ";") + +-- | 'FlexibleSeparatorSyntax', with the @;@ separator between @{@ @}@ +flexibleBraces = betweenBraces . flexibleSepBy (token ";") + +--- +--- Helper parsers which aren't explicitly specified by the AST +--- + +-- | An identifier is a sequence of characters with no token which is not a keyword. +identifier :: Parser Text +identifier = label "identifier" $ try do + notFollowedBy anyKeyword + name <- T.pack <$> (notFollowedBy anyToken *> many1 anyChar <* spaces) + if any ((`elem` keywords) . T.unpack) $ T.splitOn "_" name + then fail "identifier contained keywords" + else pure name + +token :: String -> Parser () +token tk = label tk $ string tk *> spaces + +anyToken :: Parser () +anyToken = label "any token" $ choice $ map token tokens + +-- | Tokens which are reserved by syntax +-- and may not appear /anywhere/ in an identifier. +tokens :: [String] +tokens = + [ "//", "/*", "*/" + , "(", ")", "{", "}" + , ";", "∷", "::", "@" + , " ", "\r", "\n", "\t" + , "```" + ] + +-- | A keyword is an exact string which is not part of an identifier. +keyword :: String -> Parser () +keyword kwd = label kwd $ do + try do + _ <- string kwd + notFollowedBy (notFollowedBy anyToken >> anyChar) + spaces + +anyKeyword :: Parser () +anyKeyword = label "any keyword" $ choice $ map keyword keywords + +-- | Keywords are reserved by syntax and are not allowed to be used as identifiers +-- or as a part of a `_`-separated identifier. +-- +-- However, they may be used as /part/ of an identifier. +keywords :: [String] +keywords = + [ "open", "using", "hiding", "renaming", "..", "pub" + , "data", "axiom", "adt", "gadt", "sig", "mod" + , "forall" + , "let", "in", "case" + , "[", "]" + , ":", "∀", "Λ", "^", "λ", "\\", "→", "->" + , "|", "?", "!", "_", "..." + ] + +spaces :: Parser () +spaces = label "whitespace or comments" $ + Text.Parsec.spaces >> optional (try (comment >> spaces)) + +comment, lineComment, blockComment :: Parser () + +comment = label "comment" $ lineComment <|> blockComment + +lineComment = label "line comment" $ do + _ <- try (string "//") + _ <- many1 (noneOf "\n") + pure () + +blockComment = label "block comment" $ do + _ <- try (string "/*") + _ <- many1 $ notFollowedBy (string "*/") >> anyChar + _ <- string "*/" + pure () + +-- | The opposite of `between`: ignore the middle, keep the sides +cobetween :: Parser a -> Parser b -> Parser c -> Parser (a, c) +cobetween a b c = do + x <- a + _ <- b + y <- c + pure (x, y) + +-- the parens/brackets/braces word choice is totally meaningless +betweenParens, betweenBrackets, betweenBraces :: Parser a -> Parser a + +-- | Between @(@ @)@ +betweenParens = between (token "(") (token ")") + +-- | Between @[@ @]@ +betweenBrackets = between (keyword "[") (keyword "]") + +-- | Between @{@ @}@ +betweenBraces = between (keyword "{") (keyword "}") + +many1' :: Parser a -> Parser (NonEmpty a) +many1' = fmap fromList . many1 + +snoc' :: [a] -> a -> NonEmpty a +snoc' xs x = fromList $ xs ++ [x] + +label :: String -> Parser a -> Parser a +label = flip Text.Parsec.label diff --git a/src/Ivo/Syntax/Printer.hs b/src/Ivo/Syntax/Printer.hs index 3cb3b91..e8f2de2 100644 --- a/src/Ivo/Syntax/Printer.hs +++ b/src/Ivo/Syntax/Printer.hs @@ -1,4 +1,7 @@ -module Ivo.Syntax.Printer (unparseAST, unparseType, unparseScheme) where +-- | Turn abstract syntax into the corresponding concrete syntax. +-- +-- This is /not/ a pretty-printer; it uses minimal whitespace) +module Ivo.Syntax.Printer (unparseScope, unparseTopLevel, unparseExpr) where import Ivo.Syntax.Base @@ -10,6 +13,99 @@ import Data.Text.Lazy (fromStrict, toStrict, intercalate, unwords, singleton) import Data.Text.Lazy.Builder (Builder, fromText, fromLazyText, toLazyText, fromString) import Prelude hiding (unwords) +unparseScope :: Scope -> Text +unparseScope = unambiguous . scope + +unparseTopLevel :: TopLevel -> Text +unparseTopLevel = unambiguous . topLevel + +unparseExpr :: Expr -> Text +unparseExpr = unambiguous . expr + +type Unparser a = a -> Tagged Builder + +scope :: Unparser Scope +scope (Scope tls) = tag Ambiguous $ unmany (topLevel Text -> Unparser a +u u x <> fromText txt + +topLevel :: Unparser TopLevel +topLevel = \case + Open pub expr use hide rename -> + publicOrPrivate pub <> + unambiguousExpr expr <> " " <> + openUsing use <> " " + openHiding hide <> " " + openRenaming rename + Define pub def -> + publicOrPrivate pub <> + definition def + TLData expr -> data_ expr + TLAxiom expr -> axiom expr + TLSig s -> sig s + +publicOrPrivate :: PublicOrPrivate -> Builder +publicOrPrivate Public = "pub " +publicOrPrivate Private = "" + +openUsing, openHiding :: [(Text, Maybe (Maybe [Text]))] -> Builder + +-- FIXME +openUsing _ = "using (some stuff) " + +-- FIXME +openHiding _ = "hiding (some stuff) " + +-- FIXME +openRenaming :: [(Text, Text)] +openRenaming _ = "renaming (some stuff) " + +definition :: Unparser Definition +definition (Definition name ty expr) = + fromText name <> " " <> typeSig ty <> "= " <> unambiguousExpr expr + +expr :: Unparser Expr +expr = \case + Data expr -> tag Block $ data_ expr + Axiom expr -> tag Block $ axiom expr + SigE s -> tag Block $ sig s + -- FIXME + Mod _ _ _ -> tag Block $ error "I don't know how to unparse modules" + Access expr name -> tag Ambiguous $ blockExpr expr <> "::" <> fromText name + Forall binders ty -> tag Block $ "∀ " <> typeBinders binders <> "→ " <> ambiguousExpr ty + Arrow [] arg ty -> tag Ambiguous $ blockExpr arg <> "→ " <> ambiguousExpr ty + Ann expr ty -> tag Ambiguous $ blockExpr expr <> ": " <> ambiguousExpr ty + Hole -> tag Finite "_" + TypeLam binders ty -> tag Block $ "Λ " <> typeBinders binders <> "→ " <> ambiguousExpr ty + TypeApp expr ty -> tag Ambiguous $ blockExpr <> "@" <> finiteExpr ty + Lam pats cases -> tag Block $ "λ " <> unmany1 pattern_ pats <> caseBranches cases + +data_ = undefined + +axiom = undefined + +typeBinders :: Unparser (NonEmpty (Either (NonEmpty Text, Kind) Text)) +typeBinders = error "Whatever" + +sig :: Unparser Sig +sig = error "I don't know how to unparse signatures" + +pattern_ :: Unparser Pattern +pattern_ = error "I don't know how to unparse patterns" + +caseBranch :: Unparser CaseBranch +caseBranch = error "I don't know how to unparse case branches" + +caseBranches :: Unparser [CaseBranch] +caseBranches = error "I don't know how to unparse case branches" + +guard :: Unparser Guard +guard = error "I don't know how to unparse guard clauses" + +lit :: Unparser a -> Unparser (Lit a) +lit u = error "I don't know how to unparse literals" + -- I'm surprised this isn't in base somewhere. unsnoc :: NonEmpty a -> ([a], a) unsnoc = cata \case @@ -47,78 +143,13 @@ ambiguous :: Tagged Builder -> Builder ambiguous (Finite, t) = t ambiguous (_, t) = group t --- | Turn an abstract syntax tree into the corresponding concrete syntax. --- --- This is *not* a pretty-printer; it uses minimal whitespace. -unparseAST :: AST -> Text -unparseAST = toStrict . toLazyText . snd . cata \case - VarF name -> tag Finite $ fromText name - AppF ef exs -> unparseApp ef exs - AbsF names body -> tag Block $ - let names' = fromLazyText (unwords $ map fromStrict $ toList names) - in "λ" <> names' <> ". " <> unambiguous body - LetFP defs body -> tag Block $ "let " <> unparseDefs defs <> " in " <> unambiguous body - LetRecFP def body -> tag Block $ "letrec " <> unparseDef def <> " in " <> unambiguous body - CtrF ctr e -> unparseCtr ctr e - CaseF pats -> - let pats' = fromLazyText $ intercalate "; " $ map (toLazyText . unparsePat) pats - in tag Finite $ "{ " <> pats' <> " }" - AnnF () e t -> tag Ambiguous $ final e <> " : " <> fromText (unparseType t) - PNatF n -> tag Finite $ fromString $ show n - PListF es -> - let es' = fromLazyText $ intercalate ", " $ map (toLazyText . unambiguous) es - in tag Finite $ "[" <> es' <> "]" - PStrF s -> tag Finite $ "\"" <> fromText s <> "\"" - PCharF c -> tag Finite $ "'" <> fromLazyText (singleton c) - HoleFP -> tag Finite "_" - where - unparseApp :: Tagged Builder -> NonEmpty (Tagged Builder) -> Tagged Builder - unparseApp ef (unsnoc -> (exs, efinal)) - = tag Ambiguous $ foldr (\e es' -> ambiguous e <> " " <> es') (final efinal) (ef : exs) +ambiguousExpr, blockExpr, finiteExpr :: Expr -> Builder - unparseDef (name, val) = fromText name <> " = " <> unambiguous val - unparseDefs defs = fromLazyText (intercalate "; " $ map (toLazyText . unparseDef) $ toList defs) +ambiguousExpr = ambiguous . expr - unparseCtr :: Ctr -> [Tagged Builder] -> Tagged Builder - -- Fully-applied special syntax forms - unparseCtr CPair [x, y] = tag Finite $ "(" <> unambiguous x <> ", " <> unambiguous y <> ")" - unparseCtr CCons [x, y] = tag Finite $ "(" <> unambiguous x <> " :: " <> unambiguous y <> ")" - -- Partially-applied syntax forms - unparseCtr CUnit [] = tag Finite "()" - unparseCtr CPair [] = tag Finite "(,)" - unparseCtr CLeft [] = tag Finite "Left" - unparseCtr CRight [] = tag Finite "Right" - unparseCtr CZero [] = tag Finite "Z" - unparseCtr CSucc [] = tag Finite "S" - unparseCtr CNil [] = tag Finite "[]" - unparseCtr CCons [] = tag Finite "(::)" - unparseCtr CChar [] = tag Finite "Char" - unparseCtr ctr (x:xs) = unparseApp (unparseCtr ctr []) (x :| xs) +blockExpr = final . expr - unparsePat (Pat ctr ns e) - = unambiguous (unparseCtr ctr (map (tag Finite . fromText) ns)) <> " -> " <> unambiguous e +finiteExpr = unambiguous . expr --- HACK -pattern TApp2 :: Type -> Type -> Type -> Type -pattern TApp2 tf tx ty = TApp (TApp tf tx) ty - --- TODO: Improve these printers. -unparseType :: Type -> Text -unparseType (TVar name) = name -unparseType (TApp2 TAbs a b) = "(" <> unparseType a <> " -> " <> unparseType b <> ")" -unparseType (TApp2 TProd a b) = "(" <> unparseType a <> " * " <> unparseType b <> ")" -unparseType (TApp2 TSum a b) = "(" <> unparseType a <> " + " <> unparseType b <> ")" -unparseType (TApp TList a) = "[" <> unparseType a <> "]" -unparseType (TApp a b) = "(" <> unparseType a <> " " <> unparseType b <> ")" -unparseType TAbs = "(->)" -unparseType TProd = "(*)" -unparseType TSum = "(+)" -unparseType TUnit = "★" -unparseType TVoid = "⊥" -unparseType TNat = "Nat" -unparseType TList = "[]" -unparseType TChar = "Char" - -unparseScheme :: Scheme -> Text -unparseScheme (TForall [] t) = unparseType t -unparseScheme (TForall names t) = "∀" <> T.unwords names <> ". " <> unparseType t +unmany :: Foldable t => Unparser a -> t a -> Builder +unmany u xs = error "fuck it, implement this later"