How complicated can numbers really be? In OCaml, turns out, quite a bit: sometimes ‘1’ turns up as ‘2’ in the assembly or a 64 bit number suddenly takes up 192 bits of space. But is it necessarily a bad thing?
In this video I do a deeper dive into how numbers are implemented in OCaml looking both at the runtime’s source code and the generated assembly to support the findings. I also run a number of benchmarks to get a better feel for the overhead the comes from using a universal representation of values in OCaml.
The text below severaly lacks prose and for now can be considered an appendix for the video. I hope to get back to it and re-work as a proper post at some point.
Appendix
Setup
uname -ro
lscpu | grep 'Model name'
cc --version | head -n1
: 6.4.6-100.fc37.x86_64 GNU/Linux
: Model name: AMD Ryzen 5 3600 6-Core Processor
: cc (GCC) 12.3.1 20230508 (Red Hat 12.3.1-1)
Integers
Memory representation
let x = 42 ;;
#show x
val x : int
let x = 42 in
let xr = Obj.repr x in
Obj.is_block xr, Obj.reachable_words xr, Obj.tag xr
(false, 0, 1000)
let i64 = Int64.of_int 42 in
let i64r = Obj.repr i64 in
Obj.is_block i64r, Obj.reachable_words i64r, Obj.tag i64r
(true, 3, 255)
Peaking into assembly
let plus_one x = x + 1
opam exec --switch=4.14.1 -- ocamlopt -g -O2 -S -c plus_one.ml
cat plus_one.s | grep -v cfi | grep -v align | grep -v type | grep -v size | grep -C5 addq
.file 1 "plus_one.ml"
.loc 1 1 13
.loc 1 1 17
.L100:
.loc 1 1 17
addq $2, %rax
ret
.text
.globl camlPlus_one__entry
camlPlus_one__entry:
.L101:
let plus x y = x + y
opam exec --switch=4.14.1 -- ocamlopt -g -O2 -S -c plus.ml
cat plus.s | grep -v cfi | grep -v align | grep -v type | grep -v size | grep -C5 lea
.file 1 "plus.ml"
.loc 1 1 9
.loc 1 1 15
.L100:
.loc 1 1 15
leaq -1(%rax,%rbx), %rax
ret
.text
.globl camlPlus__entry
camlPlus__entry:
.L101:
Performance of addition
let () =
let niter = int_of_string Sys.argv.(1) in
let x = ref 0 in
for i = 0 to niter do
1
x := !x + done;
exit !x
#include <stdlib.h>
int main(int argc, char* argv[]) {
long niter = atol(argv[1]);
long x = 0;
for (long i = 0; i < niter; i++) {
= x + 1;
x }
return x;
}
echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 intadd.ml -o intadd
/bin/time ./intadd 1000000000 2>&1
echo "C:"
cc -O2 cintadd.c -o cintadd
/bin/time ./cintadd 1000000000 2>&1
OCaml:
Command exited with non-zero status 1
0.24user 0.00system 0:00.24elapsed 100%CPU (0avgtext+0avgdata 1664maxresident)k
0inputs+0outputs (0major+103minor)pagefaults 0swaps
C:
0.00user 0.00system 0:00.00elapsed 80%CPU (0avgtext+0avgdata 1152maxresident)k
0inputs+0outputs (0major+65minor)pagefaults 0swaps
Performance of multiplication
let () =
let niter = int_of_string Sys.argv.(1) in
let x = ref 1 in
for i = 0 to niter do
86
x := !x * done;
exit !x
#include <stdlib.h>
int main(int argc, char* argv[]) {
long niter = atol(argv[1]);
long x = 1;
for (long i = 0; i < niter; i++) {
= x * 86;
x }
return x;
}
echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 intmul.ml -o intmul
/bin/time ./intmul 1000000000 2>&1
echo "C:"
cc -O2 cintmul.c -o cintmul
/bin/time ./cintmul 1000000000 2>&1
OCaml:
0.96user 0.00system 0:00.96elapsed 99%CPU (0avgtext+0avgdata 1664maxresident)k
0inputs+0outputs (0major+104minor)pagefaults 0swaps
C:
0.71user 0.00system 0:00.71elapsed 99%CPU (0avgtext+0avgdata 1024maxresident)k
0inputs+0outputs (0major+63minor)pagefaults 0swaps
Floats
Memory representation
let v = 0. in
let vr = Obj.repr v in
Obj.is_block vr, Obj.reachable_words vr, Obj.tag vr
(true, 2, 253)
Performance of direct addition
let () =
let niter = int_of_string Sys.argv.(1) in
let x = ref 0. in
for i = 0 to niter do
86.
x := !x +. done;
exit (Int.of_float !x)
#include <stdlib.h>
int main(int argc, char* argv[]) {
long niter = atol(argv[1]);
double x = 0;
for (long i = 0; i < niter; i++) {
= x + 86.;
x }
return (int)x;
}
echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 floatadd.ml -o floatadd
/bin/time ./floatadd 1000000000 2>&1
echo "C:"
cc -O2 cfloatadd.c -o cfloatadd
/bin/time ./cfloatadd 1000000000 2>&1
OCaml:
Command exited with non-zero status 86
0.73user 0.00system 0:00.73elapsed 99%CPU (0avgtext+0avgdata 1792maxresident)k
0inputs+0outputs (0major+105minor)pagefaults 0swaps
C:
0.72user 0.00system 0:00.72elapsed 99%CPU (0avgtext+0avgdata 1024maxresident)k
0inputs+0outputs (0major+64minor)pagefaults 0swaps
Performance of direct multiplication
let () =
let niter = int_of_string Sys.argv.(1) in
let x = ref 1. in
for i = 0 to niter do
86.
x := !x *. done;
exit (Int.of_float !x)
#include <stdlib.h>
int main(int argc, char* argv[]) {
long niter = atol(argv[1]);
double x = 1;
for (long i = 0; i < niter; i++) {
= x * 86.;
x }
return (int)x;
}
echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 floatmul.ml -o floatmul
/bin/time ./floatmul 1000000000 2>&1
echo "C:"
cc -O1 cfloatmul.c -o cfloatmul
/bin/time ./cfloatmul 1000000000 2>&1
OCaml:
0.72user 0.00system 0:00.72elapsed 99%CPU (0avgtext+0avgdata 1664maxresident)k
0inputs+0outputs (0major+105minor)pagefaults 0swaps
C:
0.71user 0.00system 0:00.72elapsed 99%CPU (0avgtext+0avgdata 1152maxresident)k
0inputs+0outputs (0major+65minor)pagefaults 0swaps
Performance of indirect addition
let [@inline never] plusf x = x +. 86.
let () =
let niter = int_of_string Sys.argv.(1) in
let x = ref 0. in
for i = 0 to niter do
x := plusf !xdone;
exit (Int.of_float !x)
#include <stdlib.h>
((noinline)) double plusf(double x) {
__attribute__return x + 86.;
}
int main(int argc, char* argv[]) {
long niter = atol(argv[1]);
double x = 0;
for (long i = 0; i < niter; i++) {
= plusf(x);
x }
return (int)x;
}
echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 floataddf.ml -o floataddf
/bin/time ./floataddf 1000000000 2>&1
echo "C:"
cc -O2 cfloataddf.c -o cfloataddf
/bin/time ./cfloataddf 1000000000 2>&1
OCaml:
Command exited with non-zero status 86
4.87user 0.00system 0:04.88elapsed 99%CPU (0avgtext+0avgdata 3712maxresident)k
0inputs+0outputs (0major+621minor)pagefaults 0swaps
C:
1.22user 0.00system 0:01.22elapsed 99%CPU (0avgtext+0avgdata 1152maxresident)k
0inputs+0outputs (0major+66minor)pagefaults 0swaps
Performance of indirect multiplication
let [@inline never] mulf x = x *. 86.
let () =
let niter = int_of_string Sys.argv.(1) in
let x = ref 1. in
for i = 0 to niter do
x := mulf !xdone;
exit (Int.of_float !x)
#include <stdlib.h>
((noinline)) double mulf(double x) {
__attribute__return x * 86.;
}
int main(int argc, char* argv[]) {
long niter = atol(argv[1]);
double x = 1;
for (long i = 0; i < niter; i++) {
= mulf(x);
x }
return (int)x;
}
echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 floatmulf.ml -o floatmulf
/bin/time ./floatmulf 100000000 2>&1
echo "C:"
cc -O2 cfloatmulf.c -o cfloatmulf
/bin/time ./cfloatmulf 100000000 2>&1
OCaml:
0.48user 0.00system 0:00.48elapsed 99%CPU (0avgtext+0avgdata 3840maxresident)k
0inputs+0outputs (0major+621minor)pagefaults 0swaps
C:
0.12user 0.00system 0:00.12elapsed 100%CPU (0avgtext+0avgdata 1024maxresident)k
0inputs+0outputs (0major+61minor)pagefaults 0swaps
Integers again, Int64.t
let [@inline never] plus64 x = Int64.add x 86L
let () =
let niter = int_of_string Sys.argv.(1) in
let x = ref 0L in
for i = 0 to niter do
x := plus64 !xdone;
exit (Int64.to_int !x)
#include <stdlib.h>
((noinline)) long plus(long x) {
__attribute__return x + 86;
}
int main(int argc, char* argv[]) {
long niter = atol(argv[1]);
long x = 0;
for (long i = 0; i < niter; i++) {
= plus(x);
x }
return (int)x;
}
echo "OCaml:"
opam exec --switch=4.14.1 -- ocamlopt -O2 int64add.ml -o int64add
/bin/time ./int64add 100000000 2>&1
echo "C:"
cc -O1 cint64add.c -o cint64add
/bin/time ./cint64add 100000000 2>&1
OCaml:
Command exited with non-zero status 86
0.37user 0.00system 0:00.37elapsed 99%CPU (0avgtext+0avgdata 3712maxresident)k
0inputs+0outputs (0major+619minor)pagefaults 0swaps
C:
0.12user 0.00system 0:00.12elapsed 100%CPU (0avgtext+0avgdata 896maxresident)k
0inputs+0outputs (0major+60minor)pagefaults 0swaps