Computing Fibonacci, but using registers, and assembly

In this post we’ll go a bit deeper into computation methods using variables in different volatile and non-volatile memory storages. As a foreword, while I’ve had this experimentation idea for some time now the fact that there is only mild academic interest in the whole experimentation setup and possible results I’ve skipped the whole effort of actually take an iniative.

Still I’ve been curious on what the results of this experimentation would be. Thanks to AI tools I could finally look into this niche topic.

Introduction

A common approach to computing Fibonacci sequence is to use stack recursion. While recursion is a powerful method it repeatedly allocates new stack frames. Using this technique uncautiously can result in a system stack overflow. This is actually a very old vunerability in linux, called fork bomb, that has been decisively addressed. In UNIX systems there is a stack limit policy for individual processes. Because of this the infamous Bash fork bomb REF is terminated by the kernel before crashing the whole system.

The Fibonacci sequence is computed by adding the two consequtive members together to produce the next member in the sequence. In x86 architecture CPU the bus width is 64 bits. In the general register we can store a uint64_t number an integer up to size $1.84×10^{19}$. This means that $F(93) = 1.2200160415122×10^{19}$ still fits in the register but $F(94) = 1.9740274219868×10^{19}$ no longer fits the variable. Larger values uint128_t and uint256_t are rightly stored in multiple registers.

Experimentation setup

For our experiment we compute the 93rd Fibonacci number using uint64_t variables using three different methods. First we use a textbook reference Fibonacci recursion written in C. Then we write the same algorithm but store the variables in registers using register keyword. Finally we write the algorithm using assembly. We won’t stop here. Let’s also compare the algorithms when they return all the Fibonacci sequence member up to member $n$ and then only member $n$. As the 93 member won’t fit to general register we’ll need to store them in RAM.

These three algorithms (tehcnically the first two) are compiled using two different compiler options -O0 (zero optimization) and -O2 (use optimization).

Let’s look at the algorithms. The source code is in Appendix A. The A1 implements a simple for loop. The A2 is identical but all the variables have the register keyword prepended. The A3 is a bit more lenghty.

Results comparison

TBC

Conclusions

TBC

Appendix A: Algorithms

1#: Textbook Fibonacci

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>

uint64_t fib_nonly(int n) {
    if (n <= 1) {
        return n;
    }
    
    uint64_t a = 0;
    uint64_t b = 1;
    uint64_t c;
    int i;
    
    for (i = 2; i <= n; i++) {
        c = a + b;
        a = b;
        b = c;
    }
    
    return b;
}

uint64_t* fib_reg_nonly(int n) {
    uint64_t* fib_array = (uint64_t*)malloc((n + 1) * sizeof(uint64_t));
    
    if (n >= 0) fib_array[0] = 0;
    if (n >= 1) fib_array[1] = 1;
    
    int i;
    for (i = 2; i <= n; i++) {
        fib_array[i] = fib_array[i - 1] + fib_array[i - 2];
    }
    
    return fib_array;
}

2#: Register Fibonacci

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>

uint64_t fib_register_compute_only(int n) {
    if (n <= 1) {
        return n;
    }
    
    register uint64_t a = 0;
    register uint64_t b = 1;
    register uint64_t c;
    register int i;
    
    for (i = 2; i <= n; i++) {
        c = a + b;
        a = b;
        b = c;
    }
    
    return b;
}

uint64_t* fib_reg_alln(int n) {
    uint64_t* fib_array = (uint64_t*)malloc((n + 1) * sizeof(uint64_t));
    
    if (n >= 0) fib_array[0] = 0;
    if (n >= 1) fib_array[1] = 1;
    
    register int i;
    register uint64_t prev1, prev2, curr;
    
    for (i = 2; i <= n; i++) {
        prev1 = fib_array[i - 1];
        prev2 = fib_array[i - 2];
        curr = prev1 + prev2;
        fib_array[i] = curr;
    }
    
    return fib_array;
}

3#: Assembly Fibonacci

    .text
    .globl fib_asm_compute_only
    .globl fib_asm_compute_and_store

# ============================================================================
# fib_asm_compute_only
# Compute nth Fibonacci number using only registers
#
# Parameters:
#   edi = n (int, first argument in x86-64 calling convention)
# Returns:
#   rax = nth Fibonacci number
#
# Registers used:
#   rax = a (previous Fibonacci number)
#   rbx = b (current Fibonacci number)
#   rcx = loop counter (i)
#   rdx = temporary for addition
#   r8d = n (saved, 32-bit to avoid sign issues)
# ============================================================================
    .p2align 4
fib_asm_compute_only:
    # Handle base cases n <= 1
    cmp     $1, %edi
    jg      .L_compute_loop_init
    movsx   %edi, %rax              # return n if n <= 1
    ret

    .p2align 4
.L_compute_loop_init:
    xor     %eax, %eax              # a = 0
    mov     $1, %edx                # b = 1 (use edx instead of rbx)
    mov     $2, %ecx                # i = 2

    .p2align 4
.L_compute_loop:
    cmp     %edi, %ecx              # compare i with n
    jg      .L_compute_done         # if i > n, exit loop
    
    # Compute next Fibonacci: c = a + b, a = b, b = c
    lea     (%rax, %rdx), %rsi      # rsi = a + b (use lea for addition)
    mov     %rdx, %rax              # a = b
    mov     %rsi, %rdx              # b = temp
    
    inc     %ecx                    # i++
    jmp     .L_compute_loop         # loop back

    .p2align 4
.L_compute_done:
    mov     %rdx, %rax              # return value = b
    ret


# ============================================================================
# fib_asm_compute_and_store
# Compute and store all Fibonacci numbers from 0 to n
#
# Parameters:
#   edi = n (int, first argument)
# Returns:
#   rax = pointer to allocated array (or NULL on error)
#
# Registers used:
#   rbx = saved n (callee-saved)
#   r12 = array pointer (callee-saved)
#   rcx = loop counter
#   rax, rdx = for computation
# ============================================================================
fib_asm_compute_and_store:
    push    %rbx                    # save callee-saved registers
    push    %r12
    
    movsx   %edi, %rbx              # save n in rbx (sign-extend to 64-bit)
    
    # Allocate memory: (n+1) * 8 bytes
    lea     1(%rbx), %rdi           # rdi = n + 1
    shl     $3, %rdi                # multiply by 8 (size of uint64_t)
    call    malloc                  # call malloc
    
    test    %rax, %rax              # check if malloc returned NULL
    jz      .L_store_error          # if NULL, return error
    
    mov     %rax, %r12              # save array pointer in r12
    
    # Initialize base cases
    movq    $0, (%r12)              # fib[0] = 0
    
    cmp     $0, %rbx                # if n == 0
    je      .L_store_done           # we're done
    
    movq    $1, 8(%r12)             # fib[1] = 1
    
    cmp     $1, %rbx                # if n == 1
    je      .L_store_done           # we're done
    
    # Loop to compute remaining values
    mov     $2, %rcx                # i = 2

.L_store_loop:
    cmp     %rbx, %rcx              # compare i with n
    jg      .L_store_done           # if i > n, exit loop
    
    # Load fib[i-1] and fib[i-2]
    mov     %rcx, %rax              # rax = i
    dec     %rax                    # rax = i - 1
    mov     (%r12, %rax, 8), %rdx   # rdx = fib[i-1]
    dec     %rax                    # rax = i - 2
    mov     (%r12, %rax, 8), %rax   # rax = fib[i-2]
    
    # Compute fib[i] = fib[i-1] + fib[i-2]
    add     %rdx, %rax              # rax = fib[i-1] + fib[i-2]
    
    # Store fib[i]
    mov     %rax, (%r12, %rcx, 8)   # fib[i] = rax
    
    inc     %rcx                    # i++
    jmp     .L_store_loop           # loop back

.L_store_done:
    mov     %r12, %rax              # return array pointer
    pop     %r12                    # restore callee-saved registers
    pop     %rbx
    ret

.L_store_error:
    xor     %rax, %rax              # return NULL
    pop     %r12                    # restore callee-saved registers
    pop     %rbx
    ret


    .section .note.GNU-stack,"",@progbits