Leaf Functions: Realization in Assembly
What is shown here is a simple and efficient way for realizing a function call convention. However, there is a serious catch to it. It only works for so called leaf functions, i.e. functions that do not call another functions. So in particular something like recursive function calls will not work with that.
Provided Material
This is the assembly program that was developed in the video:
The line with the halt instruction
1 | halt %RET_VAL
|
was changed to
1 | halt 0
|
So that it has an exit code of 0.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | .equ CALLER0, 1
.equ CALLER1, CALLER0 + 1
.equ CALLER2, CALLER1 + 1
.equ CALLER3, CALLER2 + 1
.equ CALLER_LAST,CALLER3
.equ CALLEE0, CALLER_LAST + 1
.equ CALLEE1, CALLEE0 + 1
.equ CALLEE2, CALLEE1 + 1
.equ CALLEE3, CALLEE2 + 1
.equ CALLEE_LAST,CALLEE3
.equ FUNC_ADDR, CALLER0
.equ RET_ADDR, CALLER1
.equ RET_VAL, CALLER2
.equ PARAM0, CALLEE0
//------------------------------------------------------------------------------
// program: main
.data
msg0:
.string "n = "
msg1:
.string "n + 1 = "
.equ N, CALLER3
.text
# puts("n = ");
ldzwq msg0, %PARAM0
ldzwq puts, %FUNC_ADDR
call %FUNC_ADDR, %RET_ADDR
# %RET_VAL = get_uint64();
ldzwq get_uint64, %FUNC_ADDR
call %FUNC_ADDR, %RET_ADDR
# %N = %RET_VAL + 1
addq 1, %RET_VAL, %N
# puts("n + 1 = ");
ldzwq msg1, %PARAM0
ldzwq puts, %FUNC_ADDR
call %FUNC_ADDR, %RET_ADDR
# print_uint64(%N)
movq %N, %PARAM0
ldzwq print_uint64, %FUNC_ADDR
call %FUNC_ADDR, %RET_ADDR
putc '\n'
halt %0
//------------------------------------------------------------------------------
// function: void puts(%PARAM0)
puts:
.equ addr, PARAM0
.equ ch, CALLEE1
.text
.puts.fetch:
movzbq (%addr),%ch
subq 0, %ch, %0
je .puts.ret
putc %ch
addq 1, %addr, %addr
jmp .puts.fetch
.puts.ret:
ret %RET_ADDR
//------------------------------------------------------------------------------
// function: %RET_VAL get_uint64()
get_uint64:
.equ dest, RET_VAL
.equ ch, CALLEE0
.text
ldzwq 0, %dest
.get_uint64.read:
getc %ch
# if %ch < '0' then we are done
subq '0', %ch, %0
jb .get_uint64.ret
# if %ch > '9' then we are done
subq '9', %ch, %0
ja .get_uint64.ret
subq '0', %ch, %ch
imulq 10, %dest, %dest
addq %ch, %dest, %dest
jmp .get_uint64.read
.get_uint64.ret:
ret %RET_ADDR
//------------------------------------------------------------------------------
// function: void print_uint64(%PARAM0)
print_uint64
.data
.equ val, PARAM0
.equ digit, CALLEE1
.equ p, CALLEE2
.bss
.print_uint64.buf:
.space 20
.text
# init p with .print_uint64.buf
ldzwq .print_uint64.buf, %p
.print_uint64.get_digit:
divq 10, %val, %val
addq '0', %digit, %digit
movb %digit, (%p)
addq 1, %p, %p
subq 0, %val, %0
jnz .print_uint64.get_digit
.print_uint64.print_digit:
subq 1, %p, %p
movzbq (%p), %digit
putc %digit
subq .print_uint64.buf, %p, %0
jnz .print_uint64.print_digit
ret %RET_ADDR
|
And here the ULM Instruction Set and its isa.txt source code:
RRR (OP u 8) (X u 8) (Y u 8) (Z u 8)
J26 (OP u 8) (XYZ j 24)
U16R (OP u 8) (XY u 16) (Z u 8)
0x01 RRR
: halt %X
ulm_halt(ulm_regVal(X));
0x02 RRR
: getc %X
ulm_setReg(ulm_readChar() & 0xFF, X);
0x03 RRR
: putc %X
ulm_printChar(ulm_regVal(X) & 0xFF);
0x04 J26
# Unconditional jump.
: jmp XYZ
ulm_unconditionalRelJump(XYZ);
0x05 RRR
: subq X, %Y, %Z
ulm_sub64(X, ulm_regVal(Y), Z);
0x06 J26
: jnz XYZ
: jne XYZ
ulm_conditionalRelJump(ulm_statusReg[ULM_ZF] == 0, XYZ);
0x07 J26
: jz XYZ
: je XYZ
ulm_conditionalRelJump(ulm_statusReg[ULM_ZF] == 1, XYZ);
0x08 U16R
: ldzwq XY, %Z
ulm_setReg(XY, Z);
0x09 RRR
: movzbq (%X), %Z
ulm_fetch64(0, X, 0, 0, ULM_ZERO_EXT, 1, Z);
0x0A RRR
: addq X, %Y, %Z
ulm_add64(X, ulm_regVal(Y), Z);
0x0B RRR
: imulq %X, %Y, %Z
ulm_mul64(ulm_regVal(X), ulm_regVal(Y), Z);
0x0C J26
: ja XYZ
ulm_conditionalRelJump(ulm_statusReg[ULM_ZF] == 0
&& ulm_statusReg[ULM_CF] == 0, XYZ);
0x0D J26
: jb XYZ
ulm_conditionalRelJump(ulm_statusReg[ULM_CF] == 1, XYZ);
0x0E RRR
: addq %X, %Y, %Z
: movq %X, %Z
ulm_add64(ulm_regVal(X), ulm_regVal(Y), Z);
0x0F RRR
: imulq X, %Y, %Z
ulm_mul64(X, ulm_regVal(Y), Z);
0x10 RRR
# Uses the 128-bit unsigned integer divider. The 64-bits stored in %Y are
# therefore zero extended with %0 to 128 bits and then used as numerator.
# The immediate value X is used as unsigned denominator.
#
# The result of the operation is the quotient and the remainder. These results
# will be stored in a register pair: The quotient is stored in %Z and the
# remainder in %{Z + 1}.
: divq X, %Y, %Z
ulm_div128(X, ulm_regVal(Y), ulm_regVal(0), Z, 0, Z + 1);
0x11 RRR
# The least significant byte in %X gets stored at address %Z.
: movb %X, (%Z)
ulm_store64(0, Z, 0, 0, 1, X);
0x12 RRR
# Fetches a quad word from address %X into register %Z
: movq (%X), %Z
ulm_fetch64(0, X, 0, 0, ULM_ZERO_EXT, 8, Z);
0x13 RRR
: putc X
ulm_printChar(X & 0xFF);
0x14 RRR
# Function call. Return address gets stored in %Y, then jumps to address %X.
: jmp %X, %Y
: call %X, %Y
: ret %X
ulm_absJump(ulm_regVal(X), Y);
# General meaning of the mnemonics
@addq
# Integer addition.
@getc
# Get character
@divq
# 64-bit unsigned integer division
@halt
# Halt program
@imulq
# 64-bit unsigned and signed integer multiplication.
@ja
# Jump if above (conditional jump).
@jb
# Jump if below (conditional jump).
@je
# Jump if equal (conditional jump).
@jmp
# Jump.
@jne
# Jump if not equal (conditional jump).
@jnz
# Jump if not zero (conditional jump).
@jz
# Jump if zero (conditional jump).
@ldzwq
# Load zero extended word to quad word register.
@putc
# Put character
@subq
# Integer subtraction.
@movb
# Move a byte from a register to memory (store instruction).
@movzbq
# Move zero extended byte to quad word register (fetch instruction).
@movq
# Move quad word
Function-Call Jump Instruction
Key for supporting functions calls on the ULM is the jmp instruction with Opcode 0x14. It has two register operands to which we refer as %FUNC_ADDR and %RET_ADDR respectively. When executed it does two things:
-
It stores the return address %RET_ADDR and then
-
it jumps to the address in %FUNC_ADDR.
So what does return address mean? It is the address of the instruction that follows the jump instruction in memory. For example, when the jump instruction is at address 0x20 then the return address is 0x24.
In general the control flow can not be visualized with a flow chart. But in simple cases we can use for examples colors to enhance the information in such a flow chart. For example if in a program a leaf-function is called twice the different colors used in the paths make clear to which return address the function returns:
Here another example where two different leaf-functions are called:
How the Function-Call Jump Instruction is Realized
Recall that the ULM implements this simple von Neumann cycle for executing a program. So when an instruction gets executed it was loaded from address %IP (instruction pointer) into the %IR (instruction register). So what the instruction does when it gets executed is
\[\left( u(\%\text{ip}) + 4 \right) \bmod 2^{64} \to u(\%\text{RET})\quad\text{and}\quad u(\%\text{CALL}) \to u(\%\text{ip})\]Returning from a function is then possible with a jmp %RET_ADDR, %0 instruction. Of course this requires that %RET_ADDR by then still (or again) contains the original return address. So when you program you should know that you can not simply use register %RET+ADDR without care. So you see that we need some rules, a protocol or call it convention for function calls.
Descriptions of such conventions usually the terms caller and callee. As long as we only deal with leaf functions these terms have a simple meaning. The program text can be divided into a main program and functions. In the main program we only have code that eventually calls a function, i.e. caller code. In the functions we only have code that gets executed when a function was called, i.e. callee code. Now think of two persons (caller and callee) writing a program together. The caller is writing the main program, the callee all the functions. The function call conventions are a contract between the caller and callee and allows to collaborate almost independently from each other. Actually the contract specifies what information one has to provide to another.
Caller
The caller has to know the address of the function, how to pass arguments and eventually how to get a result back from the function. The caller also has to know what registers the function might modify.
In the simplest form the function does not expect any arguments and does not return a result. In this case the caller just loads the address of the function into %CALL and does the (function) jump:
Also shown in this picture is the entry point of the program. The entry point is the address of the instruction that gets executed first when execution of the program starts. Of course a correct program should also have at least one exit point, i.e. some halt instruction. As a programmer you have to make sure that at least on of these exit points gets reached.
Callee
Accordingly the callee needs to know how to receive arguments, how to return and eventually how to give back a result. And of course, what registers can be used and modified for computations.
Again we first consider that the function does not receive any arguments and does not return any result. Then we basically have this (we talk about what registers the function implementation is allowed to modify below):
How the ARM Architecture Supports Function Calls
The ARM architecture provides the “branch and link” instruction BL for calling a function:
1 | BL function_label
|
This instruction stores the return address in a register called lr (for link register) and then jumps to the address referred to by the label function_label (Sound familiar?). Returning from the function is done on ARM with a move-instruction. You simple overwrite the instruction pointer (so what we call IP for instruction pointer on ARM it's called PC for program counter) with the link register.
1 2 3 4 | MOV pc, lr /* Return from subroutine.
Note: The MOV copies the right-hand-side to the
left-hand-side
*/
|
Note that on the ULM you can note use %IP explicitly in an instruction, but jmp %RET, %0 is implicitly is equivalent to that.
Conventions for Caller and Callee
Registers either belong to the caller or the callee. However, we keep it simple here only a subset of registers actually have a owner. If the subset turns out as too small we still can use more registers.
Registers %CALLER0, ..., %CALLER3 belong to the caller, and registers %CALLEE0, ..., %CALLEE3 to the callee. Using .equ directives we make sure that these identifiers are mapped to distinct registers (and we also make it easy to extend each set):
1 2 3 4 5 6 7 8 9 10 11 | .equ CALLER0, 1
.equ CALLER1, CALLER0 + 1
.equ CALLER2, CALLER1 + 1
.equ CALLER3, CALLER2 + 1
.equ CALLER_LAST,CALLER3
.equ CALLEE0, CALLER_LAST + 1
.equ CALLEE1, CALLEE0 + 1
.equ CALLEE2, CALLEE1 + 1
.equ CALLEE3, CALLEE2 + 1
.equ CALLEE_LAST,CALLEE3
|
Return Address
The callee needs to know where the caller stored the return address. For that we specify with
1 | .equ RET_ADDR, CALLER1
|
that it will be in %RET_ADDR.
Passing arguments
If the callee expects some parameters then the caller needs to know where the callee expects these parameters. We can easily extend this list
1 2 | .equ PARAM0, CALLEE0
.equ PARAM1, PARAM0 + 1
|
to specify that a first parameter is always expected in %PARAM0, a second always in %PARAM1, etc.
Returning results
If a callee returns a value it needs to know where the caller wants to have the result. With
1 | .equ RET_VAL, CALLER2
|
we specify that the callee should store its result in %RET_VAL.