Posted on July 30, 2023

How complicated can numbers really be? In OCaml, turns out, quite a bit: sometimes ‘1’ turns up as ‘2’ in the assembly or a 64 bit number suddenly takes up 192 bits of space. But is it necessarily a bad thing?

In this video I do a deeper dive into how numbers are implemented in OCaml looking both at the runtime’s source code and the generated assembly to support the findings. I also run a number of benchmarks to get a better feel for the overhead the comes from using a universal representation of values in OCaml.

The text below severaly lacks prose and for now can be considered an appendix for the video. I hope to get back to it and re-work as a proper post at some point.

Appendix

Setup

uname -ro
lscpu | grep 'Model name'
cc --version | head -n1
: 6.4.6-100.fc37.x86_64 GNU/Linux
: Model name:                      AMD Ryzen 5 3600 6-Core Processor
: cc (GCC) 12.3.1 20230508 (Red Hat 12.3.1-1)

Integers

Memory representation

let x = 42 ;;
#show x
val x : int
let x = 42 in
let xr = Obj.repr x in
Obj.is_block xr, Obj.reachable_words xr, Obj.tag xr
(false, 0, 1000)
let i64 = Int64.of_int 42 in
let i64r = Obj.repr i64 in
Obj.is_block i64r, Obj.reachable_words i64r, Obj.tag i64r
(true, 3, 255)

Peaking into assembly

let plus_one x = x + 1
opam exec --switch=4.14.1 -- ocamlopt -g -O2 -S -c plus_one.ml
cat plus_one.s | grep -v cfi | grep -v align | grep -v type | grep -v size | grep -C5 addq
 .file	1	"plus_one.ml"
 .loc	1	1	13
 .loc	1	1	17
.L100:
 .loc	1	1	17
 addq	$2, %rax
 ret
 .text
 .globl	camlPlus_one__entry
camlPlus_one__entry:
.L101:
let plus x y = x + y
opam exec --switch=4.14.1 -- ocamlopt -g -O2 -S -c plus.ml
cat plus.s | grep -v cfi | grep -v align | grep -v type | grep -v size | grep -C5 lea
 .file	1	"plus.ml"
 .loc	1	1	9
 .loc	1	1	15
.L100:
 .loc	1	1	15
 leaq	-1(%rax,%rbx), %rax
 ret
 .text
 .globl	camlPlus__entry
camlPlus__entry:
.L101:

Performance of addition

let () =
  let niter = int_of_string Sys.argv.(1) in
  let x = ref 0 in
  for i = 0 to niter do
    x := !x + 1
  done;
  exit !x
#include <stdlib.h>

int main(int argc, char* argv[]) {
  long niter = atol(argv[1]);

  long x = 0;
  for (long i = 0; i < niter; i++) {
    x = x + 1;
  }

  return x;
}
echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 intadd.ml -o intadd
/bin/time ./intadd 1000000000 2>&1
echo "C:"
cc -O2 cintadd.c -o cintadd
/bin/time ./cintadd 1000000000 2>&1
OCaml:
Command exited with non-zero status 1
0.24user 0.00system 0:00.24elapsed 100%CPU (0avgtext+0avgdata 1664maxresident)k
0inputs+0outputs (0major+103minor)pagefaults 0swaps
C:
0.00user 0.00system 0:00.00elapsed 80%CPU (0avgtext+0avgdata 1152maxresident)k
0inputs+0outputs (0major+65minor)pagefaults 0swaps

Performance of multiplication

let () =
  let niter = int_of_string Sys.argv.(1) in
  let x = ref 1 in
  for i = 0 to niter do
    x := !x * 86
  done;
  exit !x
#include <stdlib.h>

int main(int argc, char* argv[]) {
  long niter = atol(argv[1]);

  long x = 1;
  for (long i = 0; i < niter; i++) {
    x = x * 86;
  }

  return x;
}
echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 intmul.ml -o intmul
/bin/time ./intmul 1000000000 2>&1
echo "C:"
cc -O2 cintmul.c -o cintmul
/bin/time ./cintmul 1000000000 2>&1
OCaml:
0.96user 0.00system 0:00.96elapsed 99%CPU (0avgtext+0avgdata 1664maxresident)k
0inputs+0outputs (0major+104minor)pagefaults 0swaps
C:
0.71user 0.00system 0:00.71elapsed 99%CPU (0avgtext+0avgdata 1024maxresident)k
0inputs+0outputs (0major+63minor)pagefaults 0swaps

Floats

Memory representation

let v = 0. in
let vr = Obj.repr v in
Obj.is_block vr, Obj.reachable_words vr, Obj.tag vr
(true, 2, 253)

Performance of direct addition

let () =
  let niter = int_of_string Sys.argv.(1) in
  let x = ref 0. in
  for i = 0 to niter do
    x := !x +. 86.
  done;
  exit (Int.of_float !x)
#include <stdlib.h>

int main(int argc, char* argv[]) {
  long niter = atol(argv[1]);

  double x = 0;
  for (long i = 0; i < niter; i++) {
    x = x + 86.;
  }

  return (int)x;
}
echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 floatadd.ml -o floatadd
/bin/time ./floatadd 1000000000 2>&1
echo "C:"
cc -O2 cfloatadd.c -o cfloatadd
/bin/time ./cfloatadd 1000000000 2>&1
OCaml:
Command exited with non-zero status 86
0.73user 0.00system 0:00.73elapsed 99%CPU (0avgtext+0avgdata 1792maxresident)k
0inputs+0outputs (0major+105minor)pagefaults 0swaps
C:
0.72user 0.00system 0:00.72elapsed 99%CPU (0avgtext+0avgdata 1024maxresident)k
0inputs+0outputs (0major+64minor)pagefaults 0swaps

Performance of direct multiplication

let () =
  let niter = int_of_string Sys.argv.(1) in
  let x = ref 1. in
  for i = 0 to niter do
    x := !x *. 86.
  done;
  exit (Int.of_float !x)
#include <stdlib.h>

int main(int argc, char* argv[]) {
  long niter = atol(argv[1]);

  double x = 1;
  for (long i = 0; i < niter; i++) {
    x = x * 86.;
  }

  return (int)x;
}
echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 floatmul.ml -o floatmul
/bin/time ./floatmul 1000000000 2>&1
echo "C:"
cc -O1 cfloatmul.c -o cfloatmul
/bin/time ./cfloatmul 1000000000 2>&1
OCaml:
0.72user 0.00system 0:00.72elapsed 99%CPU (0avgtext+0avgdata 1664maxresident)k
0inputs+0outputs (0major+105minor)pagefaults 0swaps
C:
0.71user 0.00system 0:00.72elapsed 99%CPU (0avgtext+0avgdata 1152maxresident)k
0inputs+0outputs (0major+65minor)pagefaults 0swaps

Performance of indirect addition

let [@inline never] plusf x = x +. 86.

let () =
  let niter = int_of_string Sys.argv.(1) in
  let x = ref 0. in
  for i = 0 to niter do
    x := plusf !x
  done;
  exit (Int.of_float !x)
#include <stdlib.h>

__attribute__((noinline)) double plusf(double x) {
  return x + 86.;
}

int main(int argc, char* argv[]) {
  long niter = atol(argv[1]);

  double x = 0;
  for (long i = 0; i < niter; i++) {
    x = plusf(x);
  }

  return (int)x;
}
echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 floataddf.ml -o floataddf
/bin/time ./floataddf 1000000000 2>&1
echo "C:"
cc -O2 cfloataddf.c -o cfloataddf
/bin/time ./cfloataddf 1000000000 2>&1
OCaml:
Command exited with non-zero status 86
4.87user 0.00system 0:04.88elapsed 99%CPU (0avgtext+0avgdata 3712maxresident)k
0inputs+0outputs (0major+621minor)pagefaults 0swaps
C:
1.22user 0.00system 0:01.22elapsed 99%CPU (0avgtext+0avgdata 1152maxresident)k
0inputs+0outputs (0major+66minor)pagefaults 0swaps

Performance of indirect multiplication

let [@inline never] mulf x = x *. 86.

let () =
  let niter = int_of_string Sys.argv.(1) in
  let x = ref 1. in
  for i = 0 to niter do
    x := mulf !x
  done;
  exit (Int.of_float !x)
#include <stdlib.h>

__attribute__((noinline)) double mulf(double x) {
  return x * 86.;
}

int main(int argc, char* argv[]) {
  long niter = atol(argv[1]);

  double x = 1;
  for (long i = 0; i < niter; i++) {
    x = mulf(x);
  }

  return (int)x;
}
echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 floatmulf.ml -o floatmulf
/bin/time ./floatmulf 100000000 2>&1
echo "C:"
cc -O2 cfloatmulf.c -o cfloatmulf
/bin/time ./cfloatmulf 100000000 2>&1
OCaml:
0.48user 0.00system 0:00.48elapsed 99%CPU (0avgtext+0avgdata 3840maxresident)k
0inputs+0outputs (0major+621minor)pagefaults 0swaps
C:
0.12user 0.00system 0:00.12elapsed 100%CPU (0avgtext+0avgdata 1024maxresident)k
0inputs+0outputs (0major+61minor)pagefaults 0swaps

Integers again, Int64.t

let [@inline never] plus64 x = Int64.add x 86L

let () =
  let niter = int_of_string Sys.argv.(1) in
  let x = ref 0L in
  for i = 0 to niter do
    x := plus64 !x
  done;
  exit (Int64.to_int !x)
#include <stdlib.h>

__attribute__((noinline)) long plus(long x) {
  return x + 86;
}

int main(int argc, char* argv[]) {
  long niter = atol(argv[1]);

  long x = 0;
  for (long i = 0; i < niter; i++) {
    x = plus(x);
  }

  return (int)x;
}
echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 int64add.ml -o int64add
/bin/time ./int64add 100000000 2>&1
echo "C:"
cc -O1 cint64add.c -o cint64add
/bin/time ./cint64add 100000000 2>&1
OCaml:
Command exited with non-zero status 86
0.37user 0.00system 0:00.37elapsed 99%CPU (0avgtext+0avgdata 3712maxresident)k
0inputs+0outputs (0major+619minor)pagefaults 0swaps
C:
0.12user 0.00system 0:00.12elapsed 100%CPU (0avgtext+0avgdata 896maxresident)k
0inputs+0outputs (0major+60minor)pagefaults 0swaps