How complicated can numbers really be? In OCaml, turns out, quite a bit: sometimes ‘1’ turns up as ‘2’ in the assembly or a 64 bit number suddenly takes up 192 bits of space. But is it necessarily a bad thing?

In this video I do a deeper dive into how numbers are implemented in OCaml looking both at the runtime’s source code and the generated assembly to support the findings. I also run a number of benchmarks to get a better feel for the overhead the comes from using a universal representation of values in OCaml.

The text below severaly lacks prose and for now can be considered an appendix for the video. I hope to get back to it and re-work as a proper post at some point.

Appendix

Setup

uname -ro
lscpu | grep 'Model name'
cc --version | head -n1

: 6.4.6-100.fc37.x86_64 GNU/Linux
: Model name:                      AMD Ryzen 5 3600 6-Core Processor
: cc (GCC) 12.3.1 20230508 (Red Hat 12.3.1-1)

Integers

Memory representation

let x = 42 ;;
#show x

val x : int

let x = 42 in
let xr = Obj.repr x in
Obj.is_block xr, Obj.reachable_words xr, Obj.tag xr

(false, 0, 1000)

let i64 = Int64.of_int 42 in
let i64r = Obj.repr i64 in
Obj.is_block i64r, Obj.reachable_words i64r, Obj.tag i64r

(true, 3, 255)

Peaking into assembly

let plus_one x = x + 1

opam exec --switch=4.14.1 -- ocamlopt -g -O2 -S -c plus_one.ml
cat plus_one.s | grep -v cfi | grep -v align | grep -v type | grep -v size | grep -C5 addq

 .file	1	"plus_one.ml"
 .loc	1	1	13
 .loc	1	1	17
.L100:
 .loc	1	1	17
 addq	$2, %rax
 ret
 .text
 .globl	camlPlus_one__entry
camlPlus_one__entry:
.L101:

let plus x y = x + y

opam exec --switch=4.14.1 -- ocamlopt -g -O2 -S -c plus.ml
cat plus.s | grep -v cfi | grep -v align | grep -v type | grep -v size | grep -C5 lea

 .file	1	"plus.ml"
 .loc	1	1	9
 .loc	1	1	15
.L100:
 .loc	1	1	15
 leaq	-1(%rax,%rbx), %rax
 ret
 .text
 .globl	camlPlus__entry
camlPlus__entry:
.L101:

Performance of addition

let () =
  let niter = int_of_string Sys.argv.(1) in
  let x = ref 0 in
  for i = 0 to niter do
    x := !x + 1
  done;
  exit !x

#include <stdlib.h>

int main(int argc, char* argv[]) {
  long niter = atol(argv[1]);

  long x = 0;
  for (long i = 0; i < niter; i++) {
    x = x + 1;
  }

  return x;
}

echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 intadd.ml -o intadd
/bin/time ./intadd 1000000000 2>&1
echo "C:"
cc -O2 cintadd.c -o cintadd
/bin/time ./cintadd 1000000000 2>&1

OCaml:
Command exited with non-zero status 1
0.24user 0.00system 0:00.24elapsed 100%CPU (0avgtext+0avgdata 1664maxresident)k
0inputs+0outputs (0major+103minor)pagefaults 0swaps
C:
0.00user 0.00system 0:00.00elapsed 80%CPU (0avgtext+0avgdata 1152maxresident)k
0inputs+0outputs (0major+65minor)pagefaults 0swaps

Performance of multiplication

let () =
  let niter = int_of_string Sys.argv.(1) in
  let x = ref 1 in
  for i = 0 to niter do
    x := !x * 86
  done;
  exit !x

#include <stdlib.h>

int main(int argc, char* argv[]) {
  long niter = atol(argv[1]);

  long x = 1;
  for (long i = 0; i < niter; i++) {
    x = x * 86;
  }

  return x;
}

echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 intmul.ml -o intmul
/bin/time ./intmul 1000000000 2>&1
echo "C:"
cc -O2 cintmul.c -o cintmul
/bin/time ./cintmul 1000000000 2>&1

OCaml:
0.96user 0.00system 0:00.96elapsed 99%CPU (0avgtext+0avgdata 1664maxresident)k
0inputs+0outputs (0major+104minor)pagefaults 0swaps
C:
0.71user 0.00system 0:00.71elapsed 99%CPU (0avgtext+0avgdata 1024maxresident)k
0inputs+0outputs (0major+63minor)pagefaults 0swaps

Floats

Memory representation

let v = 0. in
let vr = Obj.repr v in
Obj.is_block vr, Obj.reachable_words vr, Obj.tag vr

(true, 2, 253)

Performance of direct addition

let () =
  let niter = int_of_string Sys.argv.(1) in
  let x = ref 0. in
  for i = 0 to niter do
    x := !x +. 86.
  done;
  exit (Int.of_float !x)

#include <stdlib.h>

int main(int argc, char* argv[]) {
  long niter = atol(argv[1]);

  double x = 0;
  for (long i = 0; i < niter; i++) {
    x = x + 86.;
  }

  return (int)x;
}

echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 floatadd.ml -o floatadd
/bin/time ./floatadd 1000000000 2>&1
echo "C:"
cc -O2 cfloatadd.c -o cfloatadd
/bin/time ./cfloatadd 1000000000 2>&1

OCaml:
Command exited with non-zero status 86
0.73user 0.00system 0:00.73elapsed 99%CPU (0avgtext+0avgdata 1792maxresident)k
0inputs+0outputs (0major+105minor)pagefaults 0swaps
C:
0.72user 0.00system 0:00.72elapsed 99%CPU (0avgtext+0avgdata 1024maxresident)k
0inputs+0outputs (0major+64minor)pagefaults 0swaps

Performance of direct multiplication

let () =
  let niter = int_of_string Sys.argv.(1) in
  let x = ref 1. in
  for i = 0 to niter do
    x := !x *. 86.
  done;
  exit (Int.of_float !x)

#include <stdlib.h>

int main(int argc, char* argv[]) {
  long niter = atol(argv[1]);

  double x = 1;
  for (long i = 0; i < niter; i++) {
    x = x * 86.;
  }

  return (int)x;
}

echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 floatmul.ml -o floatmul
/bin/time ./floatmul 1000000000 2>&1
echo "C:"
cc -O1 cfloatmul.c -o cfloatmul
/bin/time ./cfloatmul 1000000000 2>&1

OCaml:
0.72user 0.00system 0:00.72elapsed 99%CPU (0avgtext+0avgdata 1664maxresident)k
0inputs+0outputs (0major+105minor)pagefaults 0swaps
C:
0.71user 0.00system 0:00.72elapsed 99%CPU (0avgtext+0avgdata 1152maxresident)k
0inputs+0outputs (0major+65minor)pagefaults 0swaps

Performance of indirect addition

let [@inline never] plusf x = x +. 86.

let () =
  let niter = int_of_string Sys.argv.(1) in
  let x = ref 0. in
  for i = 0 to niter do
    x := plusf !x
  done;
  exit (Int.of_float !x)

#include <stdlib.h>

__attribute__((noinline)) double plusf(double x) {
  return x + 86.;
}

int main(int argc, char* argv[]) {
  long niter = atol(argv[1]);

  double x = 0;
  for (long i = 0; i < niter; i++) {
    x = plusf(x);
  }

  return (int)x;
}

echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 floataddf.ml -o floataddf
/bin/time ./floataddf 1000000000 2>&1
echo "C:"
cc -O2 cfloataddf.c -o cfloataddf
/bin/time ./cfloataddf 1000000000 2>&1

OCaml:
Command exited with non-zero status 86
4.87user 0.00system 0:04.88elapsed 99%CPU (0avgtext+0avgdata 3712maxresident)k
0inputs+0outputs (0major+621minor)pagefaults 0swaps
C:
1.22user 0.00system 0:01.22elapsed 99%CPU (0avgtext+0avgdata 1152maxresident)k
0inputs+0outputs (0major+66minor)pagefaults 0swaps

Performance of indirect multiplication

let [@inline never] mulf x = x *. 86.

let () =
  let niter = int_of_string Sys.argv.(1) in
  let x = ref 1. in
  for i = 0 to niter do
    x := mulf !x
  done;
  exit (Int.of_float !x)

#include <stdlib.h>

__attribute__((noinline)) double mulf(double x) {
  return x * 86.;
}

int main(int argc, char* argv[]) {
  long niter = atol(argv[1]);

  double x = 1;
  for (long i = 0; i < niter; i++) {
    x = mulf(x);
  }

  return (int)x;
}

echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 floatmulf.ml -o floatmulf
/bin/time ./floatmulf 100000000 2>&1
echo "C:"
cc -O2 cfloatmulf.c -o cfloatmulf
/bin/time ./cfloatmulf 100000000 2>&1

OCaml:
0.48user 0.00system 0:00.48elapsed 99%CPU (0avgtext+0avgdata 3840maxresident)k
0inputs+0outputs (0major+621minor)pagefaults 0swaps
C:
0.12user 0.00system 0:00.12elapsed 100%CPU (0avgtext+0avgdata 1024maxresident)k
0inputs+0outputs (0major+61minor)pagefaults 0swaps

Integers again, `Int64.t`

let [@inline never] plus64 x = Int64.add x 86L

let () =
  let niter = int_of_string Sys.argv.(1) in
  let x = ref 0L in
  for i = 0 to niter do
    x := plus64 !x
  done;
  exit (Int64.to_int !x)

#include <stdlib.h>

__attribute__((noinline)) long plus(long x) {
  return x + 86;
}

int main(int argc, char* argv[]) {
  long niter = atol(argv[1]);

  long x = 0;
  for (long i = 0; i < niter; i++) {
    x = plus(x);
  }

  return (int)x;
}

echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 int64add.ml -o int64add
/bin/time ./int64add 100000000 2>&1
echo "C:"
cc -O1 cint64add.c -o cint64add
/bin/time ./cint64add 100000000 2>&1

OCaml:
Command exited with non-zero status 86
0.37user 0.00system 0:00.37elapsed 99%CPU (0avgtext+0avgdata 3712maxresident)k
0inputs+0outputs (0major+619minor)pagefaults 0swaps
C:
0.12user 0.00system 0:00.12elapsed 100%CPU (0avgtext+0avgdata 896maxresident)k
0inputs+0outputs (0major+60minor)pagefaults 0swaps

Appendix

Setup

Integers

Memory representation

Peaking into assembly

Performance of addition

Performance of multiplication

Floats

Memory representation

Performance of direct addition

Performance of direct multiplication

Performance of indirect addition

Performance of indirect multiplication

Integers again, Int64.t

Integers again, `Int64.t`