Parameters, Return Value and Local Variables

Quiz 14: Computing the greatest common divisor (Again)

Rewrite the program gcd.s from quiz 13 such that all functions conform to the new conventions for functions. Functions that do not return a value simply do not use the space reserved on the stack for the return value.

All labels are in general 64-bit literals. If you require a 64-bit literal as an absolute address fetch it from the literal pool.

Your program should have an exit code of 0.

Submit your program with

1
submit hpc quiz14 isa.txt gcd.s

Provided Material

Here an ULM Instruction Set and its isa.txt source code that contains all the instructions shown in the video (and in addition a divq variant where all operands are registers):

RRR     (OP u 8) (X u 8) (Y u 8) (Z u 8)
RSR     (OP u 8) (X u 8) (Y s 8) (Z u 8)
J26     (OP u 8) (XYZ j 24)
U16R    (OP u 8) (XY u 16) (Z u 8)
J18R    (OP u 8) (XY j 16) (Z u 8)


0x01    RRR
:   halt    %X
    ulm_halt(ulm_regVal(X));

0x02    RRR
:   getc    %X
    ulm_setReg(ulm_readChar() & 0xFF, X);

0x03    RRR
:   putc    %X
    ulm_printChar(ulm_regVal(X) & 0xFF);

0x04    J26
:   jmp XYZ
    ulm_unconditionalRelJump(XYZ);

0x05    RRR
:   subq    X, %Y, %Z
    ulm_sub64(X, ulm_regVal(Y), Z);

0x06    J26
:   jnz XYZ
:   jne XYZ
    ulm_conditionalRelJump(ulm_statusReg[ULM_ZF] == 0, XYZ);

0x07    J26
:   jz  XYZ
:   je  XYZ
    ulm_conditionalRelJump(ulm_statusReg[ULM_ZF] == 1, XYZ);

0x08    U16R
:   ldzwq   XY, %Z
    ulm_setReg(XY, Z);

0x09    RSR
:   movzbq Y(%X), %Z
:   movzbq (%X), %Z
    ulm_fetch64(Y, X, 0, 0, ULM_ZERO_EXT, 1, Z);

0x0A    RRR
:   addq    X, %Y, %Z
    ulm_add64(X, ulm_regVal(Y), Z);

0x0B    RRR
:   imulq   %X, %Y, %Z
    ulm_mul64(ulm_regVal(X), ulm_regVal(Y), Z);

0x0C    J26
:   ja      XYZ
    ulm_conditionalRelJump(ulm_statusReg[ULM_ZF] == 0
                        && ulm_statusReg[ULM_CF] == 0, XYZ);

0x0D    J26
:   jb      XYZ
    ulm_conditionalRelJump(ulm_statusReg[ULM_CF] == 1, XYZ);

0x0E    RRR
:   addq    %X, %Y, %Z
:   movq    %X, %Z
    ulm_add64(ulm_regVal(X), ulm_regVal(Y), Z);

0x0F    RRR
:   imulq   X, %Y, %Z
    ulm_mul64(X, ulm_regVal(Y), Z);

0x10    RRR
#   Uses the 128-bit unsigned integer divider. The 64-bits stored in %Y are
#   therefore zero extended with %0 to 128 bits and then used as numerator.
#   The immediate value X is used as unsigned denominator.
#
#   The result of the operation is the quotient and the remainder. These results
#   will be stored in a register pair: The quotient is stored in %Z and the
#   remainder in %{Z + 1}.
:   divq    X, %Y, %Z
    ulm_div128(X, ulm_regVal(Y), ulm_regVal(0), Z, 0, Z + 1);

0x11    RSR
#   The least significant byte in %X gets stored at address %Z.
:   movb    %X, Y(%Z)
:   movb    %X, (%Z)
    ulm_store64(Y, Z, 0, 0, 1, X);

0x12    RSR
#   Fetches a quad word from address %X into register %Z
:   movq    Y(%X), %Z
:   movq    (%X), %Z
    ulm_fetch64(Y, X, 0, 0, ULM_ZERO_EXT, 8, Z);

0x13    RRR
:   putc    X
    ulm_printChar(X & 0xFF);

#   void ulm_absJump(uint64_t destAddr, ulm_Reg ret);
0x14    RRR
:   jmp     %X,     %Y
:   call    %X,     %Y
:   ret     %X
    ulm_absJump(ulm_regVal(X), Y);

0x15    U16R
:   shldwq  XY, %Z
    ulm_setReg(ulm_regVal(Z) << 16 | XY, Z);

0x16    J18R
:   ldpa    XY, %Z
    ulm_setReg(ulm_ipVal() + XY, Z);

0x17    RRR
:   ldfp    Y(%X), %Z
:   ldfp    (%X), %Z
    ulm_fetch64(Y * 8, X, 0, 0, ULM_ZERO_EXT, 8, Z);

0x18    RRR
:   subq    %X, %Y, %Z
    ulm_sub64(ulm_regVal(X), ulm_regVal(Y), Z);

0x19    RSR
:   movq    %X, Y(%Z)
:   movq    %X, (%Z)
    ulm_store64(Y, Z, 0, 0, 8, X);

0x1A    RRR
#   Uses the 128-bit unsigned integer divider. The 64-bits stored in %Y are
#   therefore zero extended with %0 to 128 bits and then used as numerator.
#   The register value %X is used as 64-bit unsigned denominator.
#
#   The result of the operation is the quotient and the remainder. These results
#   will be stored in a register pair: The quotient is stored in %Z and the
#   remainder in %{Z + 1}.
:   divq    %X, %Y, %Z
    ulm_div128(ulm_regVal(X), ulm_regVal(Y), ulm_regVal(0), Z, 0, Z + 1);


# General meaning of the mnemonics

@addq
#   Integer addition.
@getc
#   Get character
@divq
#   64-bit unsigned integer division
@halt
#   Halt program
@imulq
#   64-bit unsigned and signed integer multiplication.
@ja
#   Jump if above (conditional jump).
@jb
#   Jump if below (conditional jump).
@je
#   Jump if equal (conditional jump).
@jmp
#   Jump (unconditional jump).
@jne
#   Jump if not equal (conditional jump).
@jnz
#   Jump if not zero (conditional jump).
@jz
#   Jump if zero (conditional jump).
@ldzwq
#   Load zero extended word to quad word register.
@putc
#   Put character
@subq
#   Integer subtraction.
@movb
#   Move a byte from a register to memory (store instruction).
@movzbq
#   Move zero extended byte to quad word register (fetch instruction).
@movq
#   Move quad word
@shldwq
#   Shift (destination register) and load word
@ldpa
#   Load Pool Address
@ldfp
#   Load from Pool

The program func2.s shown in the video:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
    # calling conventions
    .equ    FP,         1
    .equ    SP,         2
    .equ    RET_ADDR,   3

    .equ    ret,        0
    .equ    fp,         ret + 8
    .equ    rval,       fp + 8
    .equ    fparam0,    rval + 8
    .equ    fparam1,    fparam0 + 8

    .equ    local0,     -8
    .equ    local1,     local0 - 8

    # for the demo
    .equ    FUNC_ADDR,  4


    /*
     * function: _start
     */
_start:
    ldzwq   0,              %SP

    # call: int main(void)
    subq    8 * (3 + 0),    %SP,        %SP
    ldzwq   main,           %FUNC_ADDR
    call    %FUNC_ADDR,     %RET_ADDR
    movq    rval(%SP),      %4                  # return value in %4
    addq    8 * (3 + 0),    %SP,        %SP


    halt    %4                                  # halt with return value


    /*
     * function: int main(void)
     */
main:
    // prologue
    movq    %RET_ADDR,      ret(%SP)
    movq    %FP,            fp(%SP)
    movq    %SP,            %FP
    subq    8 * 0,          %SP,            %SP

    .equ    n,              local0

    // body

    # n = foo(12, 13);
    subq    8 * (3 + 2),    %SP,        %SP
    ldzwq   12,             %4
    movq    %4,             fparam0(%SP)        # fparam0 = 12
    ldzwq   13,             %4
    movq    %4,             fparam1(%SP)        # fparam1 = 13
    ldzwq   foo,            %FUNC_ADDR
    call    %FUNC_ADDR,     %RET_ADDR
    movq    rval(%SP),      %4
    movq    %4,             n(%FP)
    addq    8 * (3 + 2),    %SP,        %SP

    # return n + 1;
    movq    n(%FP),         %4
    addq    1,              %4,         %4
    movq    %4,             rval(%FP)
    jmp     .main.ret

.main.ret
    // epilogue
    movq    %FP,            %SP
    movq    fp(%SP),        %FP
    movq    ret(%SP),       %RET_ADDR
    ret     %RET_ADDR

    /*
     * function: int foo(int a, int b)
     */
foo:
    // prologue
    movq    %RET_ADDR,      ret(%SP)
    movq    %FP,            fp(%SP)
    movq    %SP,            %FP
    subq    8 * 0,          %SP,            %SP

    .equ    a,              fparam0
    .equ    b,              fparam1

    // body

    # return a + b
    movq    a(%FP),         %4
    movq    b(%FP),         %5
    addq    %4,             %5,         %4
    movq    %4,             rval(%FP)


    // epilogue
    movq    %FP,            %SP
    movq    fp(%SP),        %FP
    movq    ret(%SP),       %RET_ADDR
    ret     %RET_ADDR

Using the Stack for Parameters and the Return Value

Functions receive copies of the arguments on the stack and return a value on the stack. Consider a function that receives just two arguments. Then the callee expects in its prologue the following stack:

Directives for Reserved Registers

The calling convention relies on a stack pointer (SP), a frame pointer (FP) and a register for storing the return address (RETADDR_):

1
2
3
    .equ    FP,         1
    .equ    SP,         2
    .equ    RET_ADDR,   3

Directives for Offsets

The following directives define symbols for the stack offsets. Depending on the contex these offsets are either releative to SP or FP:

1
2
3
4
5
6
7
8
    .equ    ret,        0
    .equ    fp,         ret + 8
    .equ    rval,       fp + 8
    .equ    fparam0,    rval + 8
    .equ    fparam1,    fparam0 + 8

    .equ    local0,     -8
    .equ    local1,     local0 - 8

Calling a function

Lets assume that every parameter has a size of 8 bytes. Then the caller first has to decrement the stack pointer by \(8 \cdot (3 + \text{"number of parameters"})\). Before the call the parameters are copied to the stack. Here an example for a function call foo(1, 2):

1
2
3
4
5
6
7
8
    subq    5 * 8,      %SP,        %SP
    ldzwq   1,          %4
    movq    %4,         24(%SP)             // store arg0
    ldzwq   2,          %4
    movq    %4,         32(%SP)             // store arg1
    ldzwq   foo,        %4
    jmp     %4,         %RET
    addq    5 * 8,      %SP,        %SP

Using the directives this can be rewritten more readable as

1
2
3
4
5
6
7
8
    subq    5 * 8,      %SP,        %SP
    ldzwq   1,          %4
    movq    %4,         fparam0(%SP)        // store arg0
    ldzwq   2,          %4
    movq    %4,         fparam1(%SP)        // store arg1
    ldzwq   foo,        %4
    jmp     %4,         %RET
    addq    5 * 8,      %SP,        %SP

Implementation of a functio

The prologue and epilogue are the same a for subprograms or procedure. Assume the function uses two local variables local0 and local1 then after the prologue

1
2
3
4
5
    // function prologue (with 2 local variables, each 8 bytes)
    movq    %RET_ADDR,  ret(%SP)
    movq    %FP,        fp(%SP)
    addq    0,          %SP,        %FP
    subq    8 * 2,      %SP,        %SP

the stack can be described by

Using the directives for offset all relevant memory locations can be accessed relative to the frame pointer in the same uniform way

Skeleton of a Function Implementation

In the following skeleton replace FUNC_OR_PROC_LABEL with the function or procedure name, and replace NUM_LOCALS with the number of local variables:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
FUNC_OR_PROC_LABEL:
    // function prologue
    movq        %RET_ADDR,      ret(%SP)
    movq        %FP,            fp(%SP)
    movq        %SP,            %FP

    // reserve space for local variables.
    subq        NUM_LOCALS*8,   %SP,            %SP

    // begin of the function body

    /*

        Implementation of the function or procedure

    */

    // end of the function body

    // function epilogue
    movq        %FP,            %SP
    movq        fp(%SP),        %FP
    movq        ret(%SP),       %RET_ADDR
    jmp         %RET_ADDR,      %0