OCaml Revisit
Based on Version 5.2, take some notes that forgot before~
Chapter 1 The core language
- If there is not enough information to disambiguate between different fields or constructors, OCaml picks the last defined type amongst all locally valid choices. link
- Imperative features
(* Arrays and for loop *)
let add_vect v1 v2 =
let len = min (Array.length v1) (Array.length v2) in
let res = Array.make len 0.0 in
for i = 0 to len - 1 do
res.(i) <- v1.(i) +. v2.(i)
done;
res
;;
(* val add_vect : float array -> float array -> float array = <fun> *)
add_vect [| 1.0; 2.0 |] [| 3.0; 4.0 |]
(* - : float array = [|4.; 6.|] *)
(* mutable record type *)
type mutable_point =
{ mutable x : float
; mutable y : float
}
let translate p dx dy =
p.x <- p.x +. dx;
p.y <- p.y +. dy
;;
(* val translate : mutable_point -> float -> float -> unit = <fun> *)
- OCaml has no built-in notion of variable – identifiers whose current value can be changed by assignment. (The let binding is not an assignment, it introduces a new identifier with a new scope.)
- The standard library provides references, which are mutable indirection cells, with operators
!
to fetch the current contents of the reference and:=
to assign the contents. Variables can then be emulated by let-binding a reference.
- The standard library provides references, which are mutable indirection cells, with operators
let insertion_sort a =
for i = 1 to Array.length a - 1 do
let val_i = a.(i) in
let j = ref i in
while !j > 0 && val_i < a.(!j - 1) do
a.(!j) <- a.(!j - 1);
j := !j - 1
done;
a.(!j) <- val_i
done
;;
(* val insertion_sort : 'a array -> unit = <fun> *)
- store a polymorphic function in a data structure, keeping its polymorphism
# type idref = { mutable id: 'a. 'a -> 'a };;
type idref = { mutable id : 'a. 'a -> 'a; }
# let r = {id = fun x -> x};;
val r : idref = {id = <fun>}
# let g s = (s.id 1, s.id true);;
val g : idref -> int * bool = <fun>
# r.id <- (fun x -> print_string "called id\n"; x);;
- : unit = ()
# g r;;
called id
called id
- : int * bool = (1, true)
type bothref = { mutable both : 'a 'b. 'a -> 'b -> 'a * 'b }
let b = { both = (fun x y -> x, y) }
- Exception
exception Empty_list
let head l =
match l with
| [] -> raise Empty_list
| hd :: _ -> hd
;;
let rec first_named_value values names =
try List.assoc (head values) names with
| Empty_list -> "no named value"
| Not_found -> first_named_value (List.tl values) names
;;
(* val first_named_value : 'a list -> ('a * string) list -> string = <fun> *)
# first_named_value [0; 10] [1, "one"; 10, "ten"];;
- : string = "ten"
(* finalization perfrom *)
let temporarily_set_reference ref newval funct =
let oldval = !ref in
try
ref := newval;
let res = funct () in
ref := oldval;
res
with
| x ->
ref := oldval;
raise x
;;
(* Use as local *)
let fixpoint f x =
let exception Done in
let x = ref x in
try while true do
let y = f !x in
if !x = y then raise Done else x := y
done
with Done -> !x;;
- val fixpoint : ('a -> 'a) -> 'a -> 'a = <fun>
- Lazy expressions, use
lazy (expr)
to delay the evaluation of some expressionexpr
.
let lazy_two =
lazy
(print_endline "lazy_two evaluation";
1 + 1)
;;
# Lazy.force lazy_two;;
lazy_two evaluation
- : int = 2
(* Lazy.force memoizes the result of the forced expression *)
# lazy_two;;
- : int lazy_t = lazy 2
(* Another way to force lazy *)
# let lazy_l = lazy ([1; 2] @ [3; 4]);;
val lazy_l : int list lazy_t = <lazy>
# let lazy l = lazy_l;;
val l : int list = [1; 2; 3; 4]
# let maybe_eval lazy_guard lazy_expr =
match lazy_guard, lazy_expr with
| lazy false, _ -> "matches if (Lazy.force lazy_guard = false); lazy_expr not forced"
| lazy true, lazy _ -> "matches if (Lazy.force lazy_guard = true); lazy_expr forced";;
val maybe_eval : bool lazy_t -> 'a lazy_t -> string = <fun>
- OCaml code can also be compiled separately and executed non-interactively using the batch compilers ocamlc and ocamlopt. The source code must be put in a file with extension
.ml
. It consists of a sequence of phrases, which will be evaluated at runtime in their order of appearance in the source file. Unlike in interactive mode, types and values are not printed automatically; the program must call printing functions explicitly to produce some output. The;;
used in the interactive examples is not required in source files created for use with OCaml compilers, but can be helpful to mark the end of a top-level expression unambiguously even when there are syntax errors.
Chapter 2 The module system
- A primary motivation for modules is to package together related definitions (such as the definitions of a data type and associated operations over that type) and enforce a consistent naming scheme for these definitions. This avoids running out of names or accidentally confusing names.
- Such a package is called a structure and is introduced by the struct…end construct, which contains an arbitrary sequence of definitions. The structure is usually given a name with the module binding.
- It is also possible to copy the components of a module inside another module by using an
include
statement. This can be particularly useful to extend existing modules. As an illustration, we could add functions that return an optional value rather than an exception when the queue is empty.
# module Fifo =
struct
type 'a queue = { front: 'a list; rear: 'a list }
let make front rear =
match front with
| [] -> { front = List.rev rear; rear = [] }
| _ -> { front; rear }
let empty = { front = []; rear = [] }
let is_empty = function { front = []; _ } -> true | _ -> false
let add x q = make q.front (x :: q.rear)
exception Empty
let top = function
| { front = []; _ } -> raise Empty
| { front = x :: _; _ } -> x
let pop = function
| { front = []; _ } -> raise Empty
| { front = _ :: f; rear = r } -> make f r
end;;
module Fifo :
sig
type 'a queue = { front : 'a list; rear : 'a list; }
val make : 'a list -> 'a list -> 'a queue
val empty : 'a queue
val is_empty : 'a queue -> bool
val add : 'a -> 'a queue -> 'a queue
exception Empty
val top : 'a queue -> 'a
val pop : 'a queue -> 'a queue
end
# Fifo.add "hello" Fifo.empty;;
- : string Fifo.queue = {Fifo.front = ["hello"]; rear = []}
Fifo.(add "hello" empty);;
let open Fifo in add "hello" empty;;
# module FifoOpt =
struct
include Fifo
let top_opt q = if is_empty q then None else Some(top q)
let pop_opt q = if is_empty q then None else Some(pop q)
end;;
module FifoOpt :
sig
type 'a queue = 'a Fifo.queue = { front : 'a list; rear : 'a list; }
val make : 'a list -> 'a list -> 'a queue
val empty : 'a queue
val is_empty : 'a queue -> bool
val add : 'a -> 'a queue -> 'a queue
exception Empty
val top : 'a queue -> 'a
val pop : 'a queue -> 'a queue
val top_opt : 'a queue -> 'a option
val pop_opt : 'a queue -> 'a queue option
end
- Signatures are interfaces for structures. A signature specifies which components of a structure are accessible from the outside, and with which type. It can be used to hide some components of a structure (e.g. local function definitions) or export some components with a restricted type.
- it is possible to include a signature to copy its components inside the current signature.
# module type FIFO =
sig
type 'a queue (* now an abstract type *)
val empty : 'a queue
val add : 'a -> 'a queue -> 'a queue
val top : 'a queue -> 'a
val pop : 'a queue -> 'a queue
exception Empty
end;;
module type FIFO =
sig
type 'a queue
val empty : 'a queue
val add : 'a -> 'a queue -> 'a queue
val top : 'a queue -> 'a
val pop : 'a queue -> 'a queue
exception Empty
end
# module AbstractQueue = (Fifo : FIFO);;
module AbstractQueue : FIFO
# AbstractQueue.make [1] [2;3] ;;
Error: Unbound value AbstractQueue.make
# AbstractQueue.add "hello" AbstractQueue.empty;;
- : string AbstractQueue.queue = <abstr>
module Fifo = (struct ... end : FIFO);;
# module type FIFO_WITH_OPT =
sig
include FIFO
val top_opt: 'a queue -> 'a option
val pop_opt: 'a queue -> 'a queue option
end;;
module type FIFO_WITH_OPT =
sig
type 'a queue
val empty : 'a queue
val add : 'a -> 'a queue -> 'a queue
val top : 'a queue -> 'a
val pop : 'a queue -> 'a queue
exception Empty
val top_opt : 'a queue -> 'a option
val pop_opt : 'a queue -> 'a queue option
end
- Functors are “functions” from modules to modules. Functors let you create parameterized modules and then provide other modules as parameter(s) to get a specific implementation.
type comparison = Less | Equal | Greater;;
# module type ORDERED_TYPE =
sig
type t
val compare: t -> t -> comparison
end;;
module type ORDERED_TYPE = sig type t val compare : t -> t -> comparison end
# module Set =
functor (Elt: ORDERED_TYPE) ->
struct
...
let rec add x s =
match s with
[] -> [x]
| hd::tl ->
match Elt.compare x hd with
Equal -> s (* x is already in s *)
| Less -> x :: s (* x is smaller than all elements of s *)
| Greater -> hd :: add x tl
...
end;;
# module OrderedString =
struct
type t = string
let compare x y = if x = y then Equal else if x < y then Less else Greater
end;;
module OrderedString :
sig type t = string val compare : 'a -> 'a -> comparison end
# module StringSet = Set(OrderedString);;
# StringSet.member "bar" (StringSet.add "foo" StringSet.empty);;
- : bool = false
# module AbstractSet2 =
(Set : functor(Elt: ORDERED_TYPE) -> (SET with type element = Elt.t));;
- In OCaml, compilation units are special cases of structures and signatures, and the relationship between the units can be explained easily in terms of the module system. A compilation unit A comprises two files:
- the implementation file A.ml, which contains a sequence of definitions, analogous to the inside of a struct…end construct;
- the interface file A.mli, which contains a sequence of specifications, analogous to the inside of a sig…end construct.
(* These two files together define a structure named A as if the following definition was entered at top-level *)
module A: sig (* contents of file A.mli *) end
= struct (* contents of file A.ml *) end;;
The files that define the compilation units can be compiled separately using the ocamlc -c
command (the -c option means “compile only, do not try to link”); this produces compiled interface files (with extension .cmi
) and compiled object code files (with extension .cmo
). When all units have been compiled, their .cmo files are linked together using the ocamlc command. For instance, the following commands compile and link a program composed of two compilation units Aux and Main:
$ ocamlc -c Aux.mli # produces aux.cmi
$ ocamlc -c Aux.ml # produces aux.cmo
$ ocamlc -c Main.mli # produces main.cmi
$ ocamlc -c Main.ml # produces main.cmo
$ ocamlc -o theprogram Aux.cmo Main.cmo
The program behaves exactly as if the following phrases were entered at top-level:
module Aux: sig (* contents of Aux.mli *) end
= struct (* contents of Aux.ml *) end;;
module Main: sig (* contents of Main.mli *) end
= struct (* contents of Main.ml *) end;;
Chapter 3 Objects in OCaml
This chapter gives an overview of the object-oriented features of OCaml.
Note that the relationship between object, class and type in OCaml is different than in mainstream object-oriented languages such as Java and C++, so you shouldn’t assume that similar keywords mean the same thing. Object-oriented features are used much less frequently in OCaml than in those languages. OCaml has alternatives that are often more appropriate, such as modules and functors. Indeed, many OCaml programs do not use objects at all.
# class point x_init =
object
val mutable x = x_init
method get_x = x
method move d = x <- x + d
end;;
class point :
int ->
object val mutable x : int method get_x : int method move : int -> unit end
# new point;;
- : int -> point = <fun>
# let p = new point 7;;
val p : point = <obj>
p#get_x;;
# class adjusted_point x_init = point ((x_init / 10) * 10);;
class adjusted_point : int -> point
- Immediate objects: The syntax is exactly the same as for class expressions, but the result is a single object rather than a class.
# let p =
object
val mutable x = 0
method get_x = x
method move d = x <- x + d
end;;
val p : < get_x : int; move : int -> unit > = <obj>
- Self reference: A method or an initializer can invoke methods on self (that is, the current object). For that, self must be explicitly bound, here to the variable s (s could be any identifier, even though we will often choose the name self.)
# class printable_point x_init =
object (s)
val mutable x = x_init
method get_x = x
method move d = x <- x + d
method print = print_int s#get_x
end;;
class printable_point :
int ->
object
val mutable x : int
method get_x : int
method move : int -> unit
method print : unit
end
- Initializers: It is also possible to evaluate an expression immediately after the object has been built. Such code is written as an anonymous hidden method called an initializer. Therefore, it can access self and the instance variables.
# class printable_point x_init =
let origin = (x_init / 10) * 10 in
object (self)
val mutable x = origin
method get_x = x
method move d = x <- x + d
method print = print_int self#get_x
initializer print_string "new point at "; self#print; print_newline ()
end;;
class printable_point :
int ->
object
val mutable x : int
method get_x : int
method move : int -> unit
method print : unit
end
# let p = new printable_point 17;;
new point at 10
val p : printable_point = <obj>
- Virtual methods: A class containing virtual methods must be flagged
virtual
, and cannot be instantiated (that is, no object of this class can be created). It still defines type abbreviations (treating virtual methods as other methods.)
# class virtual abstract_point x_init =
object (self)
method virtual get_x : int
method get_offset = self#get_x - x_init
method virtual move : int -> unit
end;;
# class point x_init =
object
inherit abstract_point x_init
val mutable x = x_init
method get_x = x
method move d = x <- x + d
end;;
(* Instance variables can also be declared as virtual, with the same effect as with methods. *)
# class virtual abstract_point2 =
object
val mutable virtual x : int
method move d = x <- x + d
end;;
class virtual abstract_point2 :
object val mutable virtual x : int method move : int -> unit end
# class point2 x_init =
object
inherit abstract_point2
val mutable x = x_init
method get_offset = x - x_init
end;;
class point2 :
int ->
object
val mutable x : int
method get_offset : int
method move : int -> unit
end
- Private methods: Private methods are methods that do not appear in object interfaces. They can only be invoked from other methods of the same object.
# class restricted_point x_init =
object (self)
val mutable x = x_init
method get_x = x
method private move d = x <- x + d
method bump = self#move 1
end;;
class restricted_point :
int ->
object
val mutable x : int
method bump : unit
method get_x : int
method private move : int -> unit
end
(* Private methods can be made public in a subclass. *)
# class point_again x =
object (self)
inherit restricted_point x
method virtual move : _
end;;
# class point_again x =
object (self : < move : _; ..> )
inherit restricted_point x
end;;
# class point_again x =
object
inherit restricted_point x as super
method move = super#move
end;;
class point_again :
int ->
object
val mutable x : int
method bump : unit
method get_x : int
method move : int -> unit
end
- Parameterized classes: At least one of the methods has a polymorphic type (here, the type of the value stored in the reference cell), thus either the class should be parametric, or the method type should be constrained to a monomorphic type.
# class oref x_init =
object
val mutable x = x_init
method get = x
method set y = x <- y
end;;
Error: Some type variables are unbound in this type:
class oref :
'a ->
object
val mutable x : 'a
method get : 'a
method set : 'a -> unit
end
The method get has type 'a where 'a is unbound
(* A monomorphic instance of the class could be defined by *)
# class oref (x_init:int) =
object
val mutable x = x_init
method get = x
method set y = x <- y
end;;
class oref :
int ->
object val mutable x : int method get : int method set : int -> unit end
(* Note that since immediate objects do not define a class type, they have no such restriction *)
- The type parameter in the declaration may actually be constrained in the body of the class definition. In the class type, the actual value of the type parameter is displayed in the constraint clause.
# class ['a] circle (c : 'a) =
object
val mutable center = c
method center = center
method set_center c = center <- c
method move = (center#move : int -> unit)
end;;
class ['a] circle :
'a ->
object
constraint 'a = < move : int -> unit; .. >
val mutable center : 'a
method center : 'a
method move : int -> unit
method set_center : 'a -> unit
end
- Polymorphic methods
(* giving an explicitly polymorphic type in the method definition *)
# class intlist (l : int list) =
object
method empty = (l = [])
method fold : 'a. ('a -> int -> 'a) -> 'a -> 'a =
fun f accu -> List.fold_left f accu l
end;;
class intlist :
int list ->
object method empty : bool method fold : ('a -> int -> 'a) -> 'a -> 'a end
(* The following idiom separates description and definition. *)
# class type ['a] iterator =
object method fold : ('b -> 'a -> 'b) -> 'b -> 'b end;;
# class intlist' l =
object (self : int #iterator)
method empty = (l = [])
method fold f accu = List.fold_left f accu l
end;;
- Functional objects: The override construct {< ... >} returns a copy of “self” (that is, the current object), possibly changing the value of some instance variables.
# class functional_point y =
object
val x = y
method get_x = x
method move d = {< x = x + d >}
method move_to x = {< x >}
end;;
class functional_point :
int ->
object ('a)
val x : int
method get_x : int
method move : int -> 'a
method move_to : int -> 'a
end
# let p = new functional_point 7;;
val p : functional_point = <obj>
# p#get_x;;
- : int = 7
# (p#move 3)#get_x;;
- : int = 10
# (p#move_to 15)#get_x;;
- : int = 15
# p#get_x;;
- : int = 7
Chapter 5 Polymorphic variants
It is a variant tag does not belong to any type in particular, the type system will just check that it is an admissible value according to its use. You need not define a type before using a variant tag. A variant type will be inferred independently for each of its uses.
- In programs, polymorphic variants work like usual ones. You just have to prefix their names with a backquote character `.
# [`On; `Off];;
- : [> `Off | `On ] list = [`On; `Off]
(* [>`Off|`On] list means that to match this list, you should at least be able to match `Off and `On, without argument *)
# `Number 1;;
- : [> `Number of int ] = `Number 1
# let f = function `On -> 1 | `Off -> 0 | `Number n -> n;;
val f : [< `Number of int | `Off | `On ] -> int = <fun>
(* [<`On|`Off|`Number of int] means that f may be applied to `Off, `On (both without argument), or `Number n where n is an integer. *)
# List.map f [`On; `Off];;
- : int list = [1; 0]
- The
>
and<
inside the variant types show that they may still be refined, either by defining more tags or by allowing less. - The above variant types were polymorphic, allowing further refinement. When writing type annotations, one will most often describe fixed variant types, that is types that cannot be refined. This is also the case for type abbreviations. Such types do not contain < or >, but just an enumeration of the tags and their associated types, just like in a normal datatype definition.
# type 'a vlist = [`Nil | `Cons of 'a * 'a vlist];;
type 'a vlist = [ `Cons of 'a * 'a vlist | `Nil ]
# let rec map f : 'a vlist -> 'b vlist = function
| `Nil -> `Nil
| `Cons(a, l) -> `Cons(f a, map f l)
;;
val map : ('a -> 'b) -> 'a vlist -> 'b vlist = <fun>
- Type-checking polymorphic variants is a subtle thing, and some expressions may result in more complex type information.
# let f = function `A -> `C | `B -> `D | x -> x;;
val f : ([> `A | `B | `C | `D ] as 'a) -> 'a = <fun>
# f `E;;
- : [> `A | `B | `C | `D | `E ] = `E
(* If we apply f to yet another tag `E, it gets added to the list. *)
# f `A;;
- : [> `A | `B | `C | `D ] = `C
- First, since this matching is open (the last case catches any tag), we obtain the type [> `A | `B] rather than [< `A | `B] in a closed matching
- Then, since x is returned as is, input and return types are identical. The notation as 'a denotes such type sharing.
# let f1 = function `A x -> x = 1 | `B -> true | `C -> false
let f2 = function `A x -> x = "a" | `B -> true ;;
val f1 : [< `A of int | `B | `C ] -> bool = <fun>
val f2 : [< `A of string | `B ] -> bool = <fun>
# let f x = f1 x && f2 x;;
val f : [< `A of string & int | `B ] -> bool = <fun>
(** Here f1 and f2 both accept the variant tags `A and `B, but the argument of `A is int for f1 and string for f2. In f’s type `C, only accepted by f1, disappears, but both argument types appear for `A as int & string. This means that if we pass the variant tag `A to f, its argument should be _both_ int and string. Since there is no such value, f cannot be applied to `A, and `B is the only accepted input. **)
- Even if a value has a fixed variant type, one can still give it a larger type through coercions. Coercions are normally written with both the source type and the destination type, but in simple cases the source type may be omitted.
# type 'a wlist = [`Nil | `Cons of 'a * 'a wlist | `Snoc of 'a wlist * 'a];;
type 'a wlist = [ `Cons of 'a * 'a wlist | `Nil | `Snoc of 'a wlist * 'a ]
# let wlist_of_vlist l = (l : 'a vlist :> 'a wlist);;
val wlist_of_vlist : 'a vlist -> 'a wlist = <fun>
# let open_vlist l = (l : 'a vlist :> [> 'a vlist]);;
val open_vlist : 'a vlist -> [> 'a vlist ] = <fun>
# fun x -> (x :> [`A|`B|`C]);;
- : [< `A | `B | `C ] -> [ `A | `B | `C ] = <fun>
# let split_cases = function
| `Nil | `Cons _ as x -> `A x
| `Snoc _ as x -> `B x
;;
val split_cases :
[< `Cons of 'a | `Nil | `Snoc of 'b ] ->
[> `A of [> `Cons of 'a | `Nil ] | `B of [> `Snoc of 'b ] ] = <fun>
- When an or-pattern composed of variant tags is wrapped inside an alias-pattern, the alias is given a type containing only the tags enumerated in the or-pattern. This allows for many useful idioms, like incremental definition of functions.
# let num x = `Num x
let eval1 eval (`Num x) = x
let rec eval x = eval1 eval x ;;
val num : 'a -> [> `Num of 'a ] = <fun>
val eval1 : 'a -> [< `Num of 'b ] -> 'b = <fun>
val eval : [< `Num of 'a ] -> 'a = <fun>
# let plus x y = `Plus(x,y)
let eval2 eval = function
| `Plus(x,y) -> eval x + eval y
| `Num _ as x -> eval1 eval x
let rec eval x = eval2 eval x ;;
val plus : 'a -> 'b -> [> `Plus of 'a * 'b ] = <fun>
val eval2 : ('a -> int) -> [< `Num of int | `Plus of 'a * 'a ] -> int = <fun>
val eval : ([< `Num of int | `Plus of 'a * 'a ] as 'a) -> int
type myvariant = [`Tag1 of int | `Tag2 of bool]
(* #myvariant equivalent to writing (`Tag1(_ : int) | `Tag2(_ : bool)) *)
# let f = function
| #myvariant -> "myvariant"
| `Tag3 -> "Tag3";;
val f : [< `Tag1 of int | `Tag2 of bool | `Tag3 ] -> string = <fun>
# let g1 = function `Tag1 _ -> "Tag1" | `Tag2 _ -> "Tag2";;
val g1 : [< `Tag1 of 'a | `Tag2 of 'b ] -> string = <fun>
# let g = function
| #myvariant as x -> g1 x
| `Tag3 -> "Tag3";;
val g : [< `Tag1 of int | `Tag2 of bool | `Tag3 ] -> string
Chapter 6 Polymorphism and its limitations
- TODO
Chapter 7 Generalized algebraic datatypes
Generalized algebraic datatypes, or GADTs, extend usual sum types(ADT) in two ways
- constraints on type parameters may change depending on the value constructor
- some type variables may be existentially quantified
Adding constraints is done by giving an explicit return type, where type parameters are instantiated:
type _ term =
| Int : int -> int term
| Add : (int -> int -> int) term
| App : ('b -> 'a) term * 'b term -> 'a term
This return type must use the same type constructor as the type being defined, and have the same number of parameters.
The constraints associated to each constructor can be recovered through pattern-matching. Namely, if the type of the scrutinee of a pattern-matching contains a locally abstract type, this type can be refined according to the constructor used. These extra constraints are only valid inside the corresponding branch of the pattern-matching. If a constructor has some existential variables, fresh locally abstract types are generated, and they must not escape the scope of this branch.
- Recursive functions
let rec eval : type a. a term -> a = function
| Int n -> n (* a = int *)
| Add -> (fun x y -> x+y) (* a = int -> int -> int *)
| App(f,x) -> (eval f) (eval x)
(* eval called at types (b->a) and b for fresh b *)
(* And use it *)
let two = eval (App (App (Add, Int 1), Int 1))
val two : int = 2
It is important to remark that the function eval is using the polymorphic syntax for locally abstract types. When defining a recursive function that manipulates a GADT, explicit polymorphic recursion should generally be used. For instance, the following definition fails with a type error:
let rec eval (type a) : a term -> a = function
| Int n -> n
| Add -> (fun x y -> x+y)
| App(f,x) -> (eval f) (eval x)
Error: This expression has type ($b -> a) term
but an expression was expected of type 'a
The type constructor $b would escape its scope
Hint: $b is an existential type bound by the constructor App.
(* happens in the branch App(f,x) when eval is called with f as an argument. In this branch, the type of f is ($App_'b -> a) term. The prefix $ in $App_'b denotes an existential type named by the compiler. Since the type of eval is 'a term -> 'a, the call eval f makes the existential type $App_'b flow to the type variable 'a and escape its scope. This triggers the above error. *)
In absence of an explicit polymorphic annotation, a monomorphic type is inferred for the recursive function. If a recursive call occurs inside the function definition at a type that involves an existential GADT type variable, this variable flows to the type of the recursive function, and thus escapes its scope.
- Type inference for GADTs is notoriously hard. This is due to the fact some types may become ambiguous when escaping from a branch.
- The Int case above, n could have either type int or a, and they are not equivalent outside of that branch.
- As a first approximation, type inference will always work if a pattern-matching is annotated with types containing no free type variables (both on the scrutinee and the return type).
- This is the case in the above example, thanks to the type annotation containing only locally abstract types.
The exhaustiveness check is aware of GADT constraints, and can automatically infer that some cases cannot happen. For instance, the following pattern matching is correctly seen as exhaustive (the Add case cannot happen).
let get_int : int term -> int = function
| Int n -> n
| App(_,_) -> 0
Chapter 9 Parallel programming
OCaml distinguishes concurrency and parallelism and provides distinct mechanisms for expressing them. Concurrency is overlapped execution of tasks (section 12.24.2) whereas parallelism is simultaneous execution of tasks. In particular, parallel tasks overlap in time but concurrent tasks may or may not overlap in time. Tasks may execute concurrently by yielding control to each other. While concurrency is a program structuring mechanism, parallelism is a mechanism to make your programs run faster. If you are interested in the concurrent programming mechanisms in OCaml, please refer to the section 12.24 on effect handlers and the chapter 34 on the threads library.
- Let us attempt to parallelise the Fibonacci function. The two recursive calls may be executed in parallel. However, naively parallelising the recursive calls by spawning domains for each one will not work as it spawns too many domains.
(* fib_par1.ml *)
let n = try int_of_string Sys.argv.(1) with _ -> 1
let rec fib n =
if n < 2 then 1 else begin
let d1 = Domain.spawn (fun _ -> fib (n - 1)) in
let d2 = Domain.spawn (fun _ -> fib (n - 2)) in
Domain.join d1 + Domain.join d2
end
let main () =
let r = fib n in
Printf.printf "fib(%d) = %d\n%!" n r
let _ = main ()
ocamlopt -o fib_par1.exe fib_par1.ml
$ ./fib_par1.exe 42
Fatal error: exception Failure("failed to allocate domain")
- OCaml has a limit of 128 domains that can be active at the same time. An attempt to spawn more domains will raise an exception. How then can we parallelise the Fibonacci function?
- The OCaml standard library provides only low-level primitives for concurrent and parallel programming, leaving high-level programming libraries to be developed and distributed outside the core compiler distribution. Domainslib is such a library for nested-parallel programming, which is epitomised by the parallelism available in the recursive Fibonacci computation. Let us use domainslib to parallelise the recursive Fibonacci program.
- Domainslib provides an async/await mechanism for spawning parallel tasks and awaiting their results. On top of this mechanism, domainslib provides parallel iterators. At its core, domainslib has an efficient implementation of work-stealing queue in order to efficiently share tasks with other domains. A parallel implementation of the Fibonacci program is given below.
(* fib_par2.ml *)
let num_domains = int_of_string Sys.argv.(1)
let n = int_of_string Sys.argv.(2)
let rec fib n = if n < 2 then 1 else fib (n - 1) + fib (n - 2)
module T = Domainslib.Task
let rec fib_par pool n =
if n > 20 then begin
let a = T.async pool (fun _ -> fib_par pool (n-1)) in
let b = T.async pool (fun _ -> fib_par pool (n-2)) in
T.await pool a + T.await pool b
end else fib n
let main () =
let pool = T.setup_pool ~num_domains:(num_domains - 1) () in
let res = T.run pool (fun _ -> fib_par pool n) in
T.teardown_pool pool;
Printf.printf "fib(%d) = %d\n" n res
let _ = main ()
- Parallel garbage collection
-
An important aspect of the scalability of parallel OCaml programs is the scalability of the garbage collector (GC). The OCaml GC is designed to have both low latency and good parallel scalability. OCaml has a generational garbage collector with a small minor heap and a large major heap.
-
New objects (upto a certain size) are allocated in the minor heap. Each domain has its own domain-local minor heap arena into which new objects are allocated without synchronising with the other domains. When a domain exhausts its minor heap arena, it calls for a stop-the-world collection of the minor heaps. In the stop-the-world section, all the domains collect their minor heap arenas in parallel evacuating the survivors to the major heap.
-
For the major heap, each domain maintains domain-local, size-segmented pools of memory into which large objects and survivors from the minor collection are allocated. Having domain-local pools avoids synchronisation for most major heap allocations. The major heap is collected by a concurrent mark-and-sweep algorithm that involves a few short stop-the-world pauses for each major cycle.
-
Overall, the users should expect the garbage collector to scale well with increasing number of domains, with the latency remaining low. For more information on the design and evaluation of the garbage collector, please have a look at the ICFP 2020 paper on Retrofitting Parallelism onto OCaml.