Computing Fibonacci, but using registers, and assembly
In this post we’ll go a bit deeper into computation methods using variables in different volatile and non-volatile memory storages. As a foreword, while I’ve had this experimentation idea for some time now the fact that there is only mild academic interest in the whole experimentation setup and possible results I’ve skipped the whole effort of actually take an iniative.
Still I’ve been curious on what the results of this experimentation would be. Thanks to AI tools I could finally look into this niche topic.
Introduction
A common approach to computing Fibonacci sequence is to use stack recursion. While recursion is a powerful method it repeatedly allocates new stack frames. Using this technique uncautiously can result in a system stack overflow. This is actually a very old vunerability in linux, called fork bomb, that has been decisively addressed. In UNIX systems there is a stack limit policy for individual processes. Because of this the infamous Bash fork bomb REF is terminated by the kernel before crashing the whole system.
The Fibonacci sequence is computed by adding the two consequtive members together to produce the next member in the sequence. In x86 architecture CPU the bus width is 64 bits. In the general register we can store a uint64_t number an integer up to size $1.84×10^{19}$. This means that $F(93) = 1.2200160415122×10^{19}$ still fits in the register but $F(94) = 1.9740274219868×10^{19}$ no longer fits the variable. Larger values uint128_t and uint256_t are rightly stored in multiple registers.
Experimentation setup
For our experiment we compute the 93rd Fibonacci number using uint64_t variables using three different methods. First we use a textbook reference Fibonacci recursion written in C. Then we write the same algorithm but store the variables in registers using register keyword. Finally we write the algorithm using assembly. We won’t stop here. Let’s also compare the algorithms when they return all the Fibonacci sequence member up to member $n$ and then only member $n$. As the 93 member won’t fit to general register we’ll need to store them in RAM.
These three algorithms (tehcnically the first two) are compiled using two different compiler options -O0 (zero optimization) and -O2 (use optimization).
Let’s look at the algorithms. The source code is in Appendix A. The A1 implements a simple for loop. The A2 is identical but all the variables have the register keyword prepended. The A3 is a bit more lenghty.
Results comparison
TBC
Conclusions
TBC
Appendix A: Algorithms
1#: Textbook Fibonacci
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
uint64_t fib_nonly(int n) {
if (n <= 1) {
return n;
}
uint64_t a = 0;
uint64_t b = 1;
uint64_t c;
int i;
for (i = 2; i <= n; i++) {
c = a + b;
a = b;
b = c;
}
return b;
}
uint64_t* fib_reg_nonly(int n) {
uint64_t* fib_array = (uint64_t*)malloc((n + 1) * sizeof(uint64_t));
if (n >= 0) fib_array[0] = 0;
if (n >= 1) fib_array[1] = 1;
int i;
for (i = 2; i <= n; i++) {
fib_array[i] = fib_array[i - 1] + fib_array[i - 2];
}
return fib_array;
}
2#: Register Fibonacci
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
uint64_t fib_register_compute_only(int n) {
if (n <= 1) {
return n;
}
register uint64_t a = 0;
register uint64_t b = 1;
register uint64_t c;
register int i;
for (i = 2; i <= n; i++) {
c = a + b;
a = b;
b = c;
}
return b;
}
uint64_t* fib_reg_alln(int n) {
uint64_t* fib_array = (uint64_t*)malloc((n + 1) * sizeof(uint64_t));
if (n >= 0) fib_array[0] = 0;
if (n >= 1) fib_array[1] = 1;
register int i;
register uint64_t prev1, prev2, curr;
for (i = 2; i <= n; i++) {
prev1 = fib_array[i - 1];
prev2 = fib_array[i - 2];
curr = prev1 + prev2;
fib_array[i] = curr;
}
return fib_array;
}
3#: Assembly Fibonacci
.text
.globl fib_asm_compute_only
.globl fib_asm_compute_and_store
# ============================================================================
# fib_asm_compute_only
# Compute nth Fibonacci number using only registers
#
# Parameters:
# edi = n (int, first argument in x86-64 calling convention)
# Returns:
# rax = nth Fibonacci number
#
# Registers used:
# rax = a (previous Fibonacci number)
# rbx = b (current Fibonacci number)
# rcx = loop counter (i)
# rdx = temporary for addition
# r8d = n (saved, 32-bit to avoid sign issues)
# ============================================================================
.p2align 4
fib_asm_compute_only:
# Handle base cases n <= 1
cmp $1, %edi
jg .L_compute_loop_init
movsx %edi, %rax # return n if n <= 1
ret
.p2align 4
.L_compute_loop_init:
xor %eax, %eax # a = 0
mov $1, %edx # b = 1 (use edx instead of rbx)
mov $2, %ecx # i = 2
.p2align 4
.L_compute_loop:
cmp %edi, %ecx # compare i with n
jg .L_compute_done # if i > n, exit loop
# Compute next Fibonacci: c = a + b, a = b, b = c
lea (%rax, %rdx), %rsi # rsi = a + b (use lea for addition)
mov %rdx, %rax # a = b
mov %rsi, %rdx # b = temp
inc %ecx # i++
jmp .L_compute_loop # loop back
.p2align 4
.L_compute_done:
mov %rdx, %rax # return value = b
ret
# ============================================================================
# fib_asm_compute_and_store
# Compute and store all Fibonacci numbers from 0 to n
#
# Parameters:
# edi = n (int, first argument)
# Returns:
# rax = pointer to allocated array (or NULL on error)
#
# Registers used:
# rbx = saved n (callee-saved)
# r12 = array pointer (callee-saved)
# rcx = loop counter
# rax, rdx = for computation
# ============================================================================
fib_asm_compute_and_store:
push %rbx # save callee-saved registers
push %r12
movsx %edi, %rbx # save n in rbx (sign-extend to 64-bit)
# Allocate memory: (n+1) * 8 bytes
lea 1(%rbx), %rdi # rdi = n + 1
shl $3, %rdi # multiply by 8 (size of uint64_t)
call malloc # call malloc
test %rax, %rax # check if malloc returned NULL
jz .L_store_error # if NULL, return error
mov %rax, %r12 # save array pointer in r12
# Initialize base cases
movq $0, (%r12) # fib[0] = 0
cmp $0, %rbx # if n == 0
je .L_store_done # we're done
movq $1, 8(%r12) # fib[1] = 1
cmp $1, %rbx # if n == 1
je .L_store_done # we're done
# Loop to compute remaining values
mov $2, %rcx # i = 2
.L_store_loop:
cmp %rbx, %rcx # compare i with n
jg .L_store_done # if i > n, exit loop
# Load fib[i-1] and fib[i-2]
mov %rcx, %rax # rax = i
dec %rax # rax = i - 1
mov (%r12, %rax, 8), %rdx # rdx = fib[i-1]
dec %rax # rax = i - 2
mov (%r12, %rax, 8), %rax # rax = fib[i-2]
# Compute fib[i] = fib[i-1] + fib[i-2]
add %rdx, %rax # rax = fib[i-1] + fib[i-2]
# Store fib[i]
mov %rax, (%r12, %rcx, 8) # fib[i] = rax
inc %rcx # i++
jmp .L_store_loop # loop back
.L_store_done:
mov %r12, %rax # return array pointer
pop %r12 # restore callee-saved registers
pop %rbx
ret
.L_store_error:
xor %rax, %rax # return NULL
pop %r12 # restore callee-saved registers
pop %rbx
ret
.section .note.GNU-stack,"",@progbits