Of Zippers and types

or how to take back your data structure from the clutch of the typechecker using GADTs


Florian Angeletti
Inria Paris, Lambda Lille, February 11 2021

type 'a list =
  | []
  | (::): 'a * 'a list

type ('a,'hole) zipper = {left: 'a list ; hole:'hole; right:'a list}

let next z = match z.right with
  | [] -> None
  |  r :: rs ->
    Some { left = z.hole :: z.left; hole=r; right = rs }
let back z = match z.left with
  | [] -> None
  |  l :: ls ->
    Some { left = ls; hole=l; right = z.hole :: z.right }
let fold_step f z = match z.right with
  | [] -> None
  | a :: right ->
     Some { left = a :: z.left; hole = f z.hole a; right }

let fold_left f start l =
  let rec loop z = match fold_step f z with
      | None -> z.hole
      | Some z -> loop z
  in
  loop { left = [] ; hole = start; right = l }

Algebraic rules for zipper

Zipper(a + b) = Zipper(a) + Zipper (b)
Zipper(a * b) = Zipper(a) * b + a * Zipper(b)

Deriving a zipper for list

list(x) = 1 + x * list(x)
Zipper list x = Zipper x * list x + x * Zipper list x

Direct translation

type ('a,'h) zipper =
  | End: 'h * 'a list
  | Cons: 'a * 'a zipper

After simplification

type 'a zipper = {
  left: 'a list;
  hole:'a;
  right: 'a list;
}

Formal series

list x = 1 + x * list x
(1 - x) * list x = 1
list x = 1/(1-x)
Zipper(list) x = Zipper(x)/(1-x)^2
type 'a zipper = {
  left: 'a list;
  hole:'a;
  right: 'a list;
}
 type 'a t = Leaf of 'a | Node of 'a t * 'a t
type direction = Left | Right
 type 'a zipper = { path: (direction * 'a t) list; hole: 'a t }
Zipper(t) = Zipper(x) + 2 * t * Zipper(t)(x) 
type direction = Left | Right
type 'a zipper = { hole: 'a t; path: (direction * 'a t) list }
let left z = match z.hole with
  | Node (l,r) -> Some { hole = l; path = (Right,r) :: z.path }
  | Leaf _ -> None
let right z = match z.hole with
  | Node (l,r) -> Some { hole = r; path = (Left,l) :: z.path }
  | Leaf _ -> None
let up z = match z.path with
  | [] -> None
  | (Left, l) :: path ->
    Some { path; hole = Node(l,z.hole) }
  | (Right, r) :: path ->
    Some { path; hole = Node(z.hole,r) }

A small slice of OCaml module system

type name = string
type expr =
  | Open of module_
  | Bind of name * module_

and module_ =
 | Structure of expr list
 | Ident of name

How to compute the dependencies of ocaml module?


(* A.ml *)
module M = struct
  module C = struct
    module D = struct end
  end
end
module type T = B.T
open (M:T)
open C
open D
  • A is a submodule
  • B is an external module
  • (M:T) is ?
  • C can be either M.C or a compilation unit
  • D can be either M.C.D, C.D or a compilation unit
type step =
  | Open
  | Bind_left of module_
  | Bind_right of name

  | Str of {left:expr list; right:expr list}
  | Ident

type hole =
  | Name of name
  | Module of module_
  | Expr of expr

type zipper = { hole: hole; path: step list }

A representation similar to the one for homogeneous binary tree

  let up z = match z.hole, z.path with
  | _, [] -> z

  | Module m, Open :: path -> { hole = Expr (Open m); path }
  | _, Open :: path -> assert false

  | Name name, Bind_left m :: path -> { hole = Expr(Bind (name,m)); path }
  | _, Bind_left _m :: path -> assert false

  | Module m, Bind_right name :: path -> { hole = Expr(Bind (name,m)); path }
  | _,  Bind_right _name :: path -> assert false

  | Expr e, Str {left;right} :: path ->
    { hole = Module(Structure (List.rev_append left (e::right))); path }
  | _, Str _ :: path -> assert false

  | Name n, Ident :: path -> { hole = Module(Ident n); path }
  | _, Ident :: _ -> assert false
  • A lot of invalid states
    • Path elements can follow any other elements
       let invalid_path = [Ident; Ident]
      
    • The type of the hole is independent of the path
    •  let nonsense = { hole = Structure []; path = [Ident] }
      
  • Exhaustiveness checking is impossible
  • Lot of redundancy
type expr =
  | Open of module_
  | Bind of name * module_

and module_ =
 | Structure of expr list
 | Ident of name
Zipper(expr) =
   Open * Zipper(module_)
  + Bind * Zipper(name) * module_
  + Bind * name * Zipper(module_)

Zipper(module_) =
 Structure * list(expr) * Zipper(expr) * list(expr)
 + Ident * Zipper(name)
      Two class of types
    • a zipper type
    • a familly of path type for each final type of the path
      type zipper =
      | Name_hole of name * to_name
      | Module_hole of module_ * to_module
      | Expr_hole of expr * to_expr
    
    type to_name =
      | Ident of {up:to_module}
      | Bind_left of {m:module_; up:to_expr }
    and to_expr =
      | Str of {left:expr list; right:expr list; up:to_module}
    and to_module =
      | Open of { up: to_expr }
      | Bind_right of { name:string; up:to_expr }
    
      | Root
    
    • A foreign constructor Root for termination
    let up = function
      | Name_hole (name, Ident {up}) -> Module_hole(Ident name, up)
      | Name_hole (name, Bind_left {m;up}) -> Expr_hole(Bind(name,m), up)
    
      | Expr_hole (expr, Str {left; right; up}) ->
        Module_hole (Structure (List.rev_append left (expr::right)), up)
      | Module_hole (m, Open {up} ) -> Expr_hole (Open m, up)
      | Module_hole (m, Bind_right {name;up}) -> Expr_hole(Bind(name,m),up)
    
      | Module_hole (_, Root) as root -> root
    
    • Still some redundancy
    • The list of elements is hidden

    Too lax

    type step =
      | Open
      | Bind_left of module_
      | Bind_right of name
    
      | Str of {left:expr list; right:expr list}
      | Ident
    

    Too fragmented

    type to_name =
      | Ident of {up:to_module}
      | Bind_left of {m:module_; up:to_expr }
    and to_expr =
      | Str of {left:expr list; right:expr list; up:to_module}
    and to_module =
      | Open of { up: to_expr }
      | Bind_right of { name:string; up:to_expr }
    
      | Root
    

    Can we get back to a tree zipper as a typed list of unvisited subtrees ?

    Generalized algebraic data types to the rescue

    Generalized data types (GADTs) make it possible to refine the reliationship between data and types
    • Substractive view: add constraints to make some case impossible in a type definition
    • Additive view: create family of types
    type to_name =
      | Ident of {up:to_module}
      | Bind_left of {m:module_; up:to_expr }
    and to_expr =
      | Str of {left:expr list; right:expr list; up:to_module}
    and to_module =
      | Open of { up: to_expr }
      | Bind_right of { name:string; up:to_expr }
    
      | Root
    
    The three types on the left are part of the same family, they only differ by the type of the hole:
    type to_module = module_ path
    type to_name = name path
    type to_expr= expr path
    
    type to_name =
      | Ident of {up:to_module}
      | Bind_left of {m:module_; up:to_expr }
    
    and to_module =
      | Open of { up: to_expr }
      | Bind_right of { name:name; up:to_expr }
      | Root
    
    and to_expr =
      | Str of {left:expr list; right:expr list; up:to_module}
    
    
    type _ path =
     | Ident: {up:module_ path} -> name path
     | Bind_left: {m:module_; up:expr path} -> name path
    
    
     | Open: { up: expr path} -> module_ path
     | Bind_right: {name:name; up=expr path} -> module_ path
    
    
    
     | Str: {left:expr list; right:expr list; up: module_ path } -> expr path
    
    
    
     | Root: module_ path
    

    Without GADTs

    type zipper =
      | Name of name * to_name
      | Module of module_ * to_module
      | Expr of expr * to_expr
    
    
    type zipper =
      | Name of name * name path
      | Module of module_ * module_ path
      | Expr of expr * expr path
    

    With GADTs

    
     type 'a zipper = { hole:'a; path:'a path }
     (* val up: 'a zipper -> ? zipper *)
    
     type any_zipper =
     | Any: 'a zipper -> any_zipper [@@unboxed]
    
    
    let up (Any z) = match z.path with
        | Ident {up} ->
          Any { hole : module_ = Ident z.hole; path=up }
        | Bind_left {m;up} ->
          Any { hole=Bind(z.hole,m); path=up }
    
        | Str {left; right; up} ->
          Any { hole=Structure (List.rev_append left (z.hole::right)); path=up }
        | Open path -> Any {hole:expr=Open z.hole; path}
        | Bind_right {name;up} -> Any { hole=Bind(name,z.hole); path=up }
    
        | Root -> Any z
    
    type (_,_) step =
        | Ident: (name, module_) step
    
        | Bind_left: module_ -> (name, expr) step
    
        | Open: (module_, expr) step
        | Bind_right: name -> (module_, expr) step
    
    type ('hole,'start) path =
      | []: ('start,'start) path
      | (::): ('hole,'current_hole) step * ('current_hole,'start) path -> ('a,'start) path
    

    No more interned list

    Only valid path can be constructed

    
    open A
    module M = ?
    module B = struct end
    
    let valid = [
        Ident;
        Bind_right "M";
        Str {
          left=[Open (Ident "A")];
          right=[Bind("B",Structure [])]
        }
    ]
    
    
    let invalid = [ Ident; Ident ]
                    ^^^^^
    Error: This expression has type (string, module_) step
      but an expression was expected of type (module_, 'a) step
      Type string is not compatible with type module_
    
    type ('hole,'start) zipper = { hole:'hole; path:('hole,'start) path }
    type 'b any = Any: ('hole,'start) zipper -> 'start any
    
    
      let up (type b) (Any z: b any): b any  = match z.path with
        | [] -> Any z
    
        | Ident :: path ->
          Any { hole : module_ = Ident z.hole; path }
        | Bind_left m :: path ->
          Any { hole=Bind(z.hole,m); path }
    
        | Str {left; right} :: path ->
          Any { hole=Structure (List.rev_append left (z.hole::right)); path }
        | Open :: path -> Any {hole:expr=Open z.hole; path}
        | Bind_right name :: path -> Any { hole=Bind(name,z.hole); path }
    

    Types are back

    • Fully typed

    • No redundancies

    • Type definition: ~100 lines
    • Fold: ~500 lines
    • Generic printer: ~200 lines
    • Zipper are an interesting structure for decomposing computation
    • They become unwieldy for complex heterogenous trees
      • Combining them with GADTs make it easy to avoid:
        • invalid states
        • redundancy