CPW Part 12: Interface for Code Generation

If you are using my makefile from Session 21, Page 2 be sure that you are using tha latest version. In a first versions all source files with the prefix “gen” were considered as generated source files. In the updated version only source files with the prefix “gen_” are considered as generated files. This is important as the source file “gen.c” will be used for implementing an interface for code generation.

Underlying ULM Instruction Set to Use

Use the ULM Instruction Set from Session 14, Page 4 extended for an idivq instruction where all operands are registers. Here the isa.txt:

session14/stack/0_ulm_variants/stack/isa.txt

RRR     (OP u 8) (X u 8) (Y u 8) (Z u 8)
RSR     (OP u 8) (X u 8) (Y s 8) (Z u 8)
J26     (OP u 8) (XYZ j 24)
U16R    (OP u 8) (XY u 16) (Z u 8)
J18R    (OP u 8) (XY j 16) (Z u 8)


0x01    RRR
:   halt    %X
    ulm_halt(ulm_regVal(X));

0x02    RRR
:   getc    %X
    ulm_setReg(ulm_readChar() & 0xFF, X);

0x03    RRR
:   putc    %X
    ulm_printChar(ulm_regVal(X) & 0xFF);

0x04    J26
:   jmp XYZ
    ulm_unconditionalRelJump(XYZ);

0x05    RRR
:   subq    X, %Y, %Z
    ulm_sub64(X, ulm_regVal(Y), Z);

0x06    J26
:   jnz XYZ
:   jne XYZ
    ulm_conditionalRelJump(ulm_statusReg[ULM_ZF] == 0, XYZ);

0x07    J26
:   jz  XYZ
:   je  XYZ
    ulm_conditionalRelJump(ulm_statusReg[ULM_ZF] == 1, XYZ);

0x08    U16R
:   ldzwq   XY, %Z
    ulm_setReg(XY, Z);

0x09    RSR
:   movzbq Y(%X), %Z
:   movzbq (%X), %Z
    ulm_fetch64(Y, X, 0, 0, ULM_ZERO_EXT, 1, Z);

0x0A    RRR
:   addq    X, %Y, %Z
    ulm_add64(X, ulm_regVal(Y), Z);

0x0B    RRR
:   imulq   %X, %Y, %Z
    ulm_mul64(ulm_regVal(X), ulm_regVal(Y), Z);

0x0C    J26
:   ja      XYZ
    ulm_conditionalRelJump(ulm_statusReg[ULM_ZF] == 0
                        && ulm_statusReg[ULM_CF] == 0, XYZ);

0x0D    J26
:   jb      XYZ
    ulm_conditionalRelJump(ulm_statusReg[ULM_CF] == 1, XYZ);

0x0E    RRR
:   addq    %X, %Y, %Z
:   movq    %X, %Z
    ulm_add64(ulm_regVal(X), ulm_regVal(Y), Z);

0x0F    RRR
:   imulq   X, %Y, %Z
    ulm_mul64(X, ulm_regVal(Y), Z);

0x10    RRR
#   Uses the 128-bit unsigned integer divider. The 64-bits stored in %Y are
#   therefore zero extended with %0 to 128 bits and then used as numerator.
#   The immediate value X is used as unsigned denominator.
#
#   The result of the operation is the quotient and the remainder. These results
#   will be stored in a register pair: The quotient is stored in %Z and the
#   remainder in %{Z + 1}.
:   divq    X, %Y, %Z
    ulm_div128(X, ulm_regVal(Y), ulm_regVal(0), Z, 0, Z + 1);

0x11    RSR
#   The least significant byte in %X gets stored at address %Z.
:   movb    %X, Y(%Z)
:   movb    %X, (%Z)
    ulm_store64(Y, Z, 0, 0, 1, X);

0x12    RSR
#   Fetches a quad word from address %X into register %Z
:   movq    Y(%X), %Z
:   movq    (%X), %Z
    ulm_fetch64(Y, X, 0, 0, ULM_ZERO_EXT, 8, Z);

0x13    RRR
:   putc    X
    ulm_printChar(X & 0xFF);

#   void ulm_absJump(uint64_t destAddr, ulm_Reg ret);
0x14    RRR
:   jmp     %X,     %Y
:   call    %X,     %Y
:   ret     %X
    ulm_absJump(ulm_regVal(X), Y);

0x15    U16R
:   shldwq  XY, %Z
    ulm_setReg(ulm_regVal(Z) << 16 | XY, Z);

0x16    J18R
:   ldpa    XY, %Z
    ulm_setReg(ulm_ipVal() + XY, Z);

0x17    RRR
:   ldfp    Y(%X), %Z
:   ldfp    (%X), %Z
    ulm_fetch64(Y * 8, X, 0, 0, ULM_ZERO_EXT, 8, Z);

0x18    RRR
:   subq    %X, %Y, %Z
    ulm_sub64(ulm_regVal(X), ulm_regVal(Y), Z);

0x19    RSR
:   movq    %X, Y(%Z)
:   movq    %X, (%Z)
    ulm_store64(Y, Z, 0, 0, 8, X);

0x1A    RRR
#   Uses the 128-bit unsigned integer divider. The 64-bits stored in %Y are
#   therefore zero extended with %0 to 128 bits and then used as numerator.
#   The register value %X is used as 64-bit unsigned denominator.
#
#   The result of the operation is the quotient and the remainder. These results
#   will be stored in a register pair: The quotient is stored in %Z and the
#   remainder in %{Z + 1}.
:   divq    %X, %Y, %Z
    ulm_div128(ulm_regVal(X), ulm_regVal(Y), ulm_regVal(0), Z, 0, Z + 1);


# General meaning of the mnemonics

@addq
#   Integer addition.
@getc
#   Get character
@divq
#   64-bit unsigned integer division
@halt
#   Halt program
@imulq
#   64-bit unsigned and signed integer multiplication.
@ja
#   Jump if above (conditional jump).
@jb
#   Jump if below (conditional jump).
@je
#   Jump if equal (conditional jump).
@jmp
#   Jump (unconditional jump).
@jne
#   Jump if not equal (conditional jump).
@jnz
#   Jump if not zero (conditional jump).
@jz
#   Jump if zero (conditional jump).
@ldzwq
#   Load zero extended word to quad word register.
@putc
#   Put character
@subq
#   Integer subtraction.
@movb
#   Move a byte from a register to memory (store instruction).
@movzbq
#   Move zero extended byte to quad word register (fetch instruction).
@movq
#   Move quad word
@shldwq
#   Shift (destination register) and load word
@ldpa
#   Load Pool Address
@ldfp
#   Load from Pool

Initial Interface

Below the initial interface for code generation. In the video you saw how parts of it can be implemented.

session22/gen.h

#ifndef ABC_GEN_H
#define ABC_GEN_H

#include <stdint.h>
#include <stdio.h>

typedef uint8_t GenReg;

// before the interface can be used an output needs to be specified
void genSetOutput(FILE *out);

// header / footer
void genHeader(void);
void genFooter(void);

// set active segment
void genText(void);
void genData(void);
void genBSS(void);

// generate data
void genLabeledUInt(const char *label, uint64_t val);

// acquire / release register
GenReg genGetReg(void);
void genUngetReg(GenReg reg);

// load literal into register
void genLoadUInt(uint64_t val, GenReg reg);
void genLoadLabel(const char *label, GenReg reg);

// fetch / store quad word (8 bytes)
void genFetch(GenReg addr, GenReg dest);
void genStore(GenReg src, GenReg addr);

enum GenOp
{
    GEN_OP3R_BEGIN,
    GEN_ADD_R = GEN_OP3R_BEGIN,
    GEN_OP3R_END,

    GEN_OP3I_BEGIN = GEN_OP3R_END,
    GEN_ADD_I = GEN_OP3I_BEGIN,
    GEN_OP3I_END,
};

// 3 address instructions
void genOp3r(enum GenOp op, GenReg reg0, GenReg reg1, GenReg reg2);
void genOp3i(enum GenOp op, uint64_t val, GenReg reg1, GenReg reg2);

#endif // ABC_GEN_H

Implement the interface or extend the implementation as follows:

With genSetOutput() it is possible to specify an open file pointer to which all generated code will be written. It will be necessary to call genSetOutput() to use the interface.

Define in the implementation a global variable for the file pointer used by the interface, e.g.
1
static FILE *out;
With genSetOutput() this file pointer can be set. All internal print functions should write to this file pointer. In the test program you can call genSetOutput(stdout) or you can practise handling of program arguments and file handling ;-)
Change genGetReg() so that only registers with an id of 6 or larger can be acquired. For supporting interger division it will be convenient to reserve register %4 and %5 for internal usage.
In general labels are 64 bit literals. So genLoadLabel() needs to be changed. For keeping it simple we will not use literal pools however. Instead we use the word operators @w[0-3]. Calling genLoadLabel(“foo”, 42)_ should produce the code (without the comment):
1 2 3 4 5
# produced by genLoadLabel("foo", 42) ldzwq @w3(foo), %42 shldwq @w2(foo), %42 shldwq @w1(foo), %42 shldwq @w0(foo), %42
Also genLoadUInt() should be able to load 64-bit literals into a register. Compared to labels this can be supported more efficient. Unsigned integer values in the range \([0, 2^{16})\) can be loaded with a single ldzwq instruction. For example:
1 2
# produced by genLoadUInt(0xFFFF, 42) ldzwq 0xFFFF, %42
If the values are in the range \([2^{16}, 2^{32})\) two instructions can be used. For example:
1 2 3
# produced by genLoadUInt(0x1234FFFF, 42) ldzwq @w1(0x1234FFFF), %42 shldwq @w0(0x1234FFFF), %42
Analogously all 64-bit literals can be handled.
If in the call of genOp3i() the immediate value is out of range then the implementation should acquire a register, produce an instruction to load the immediate value into this register and then call genOp3r() to produce a corresponding instruction that has only register operands. Of course acquired registers need to be released.

genOp3r() and genOp3i() should support integer operation for addition, subtraction, multiplication and modulo. Change the enum GenOp type to

enum GenOp
{
    GEN_OP3R_BEGIN,
    GEN_ADD_R = GEN_OP3R_BEGIN,   // addition
    GEN_SUB_R,                    // subtraction
    GEN_IMUL_R,                   // multiplication
    GEN_DIV_R,                    // division
    GEN_MOD_R,                    // modulo
    GEN_OP3R_END,

    GEN_OP3I_BEGIN = GEN_OP3R_END,
    GEN_ADD_I = GEN_OP3I_BEGIN,
    GEN_SUB_I,
    GEN_IMUL_I,
    GEN_DIV_I,
    GEN_MOD_I,
    GEN_OP3I_END,
};

Integer subtraction and multiplication can be implemented analogously to addition. Subtraction can be mapped to the instruction with mnemonic subq and multiplication to the instruction with mnemonic imulq. For example:

# produced by genOp3r(GEN_SUB_R, 6, 7, 9) 
    subq %6, %7, %9

And

# produced by genOp3i(GEN_MUL_I, 6, 7, 9) 
    imulq 6, %7, %9

The operations for division and modulo require some special treatment. They both can be mapped to an instruction with mnemonic divq. The corresponding instruction always computes both, the quotient and the remainder of the division. And therefore actually a register pairs needs to be acquired for this operations. For avoiding the treatment of this special case let the instruction store the result in the register pair %4 and %5. Afterwards copy from one of these registers what is needed into the destination register. For example:

# produced by genOp3r(GEN_DIV_R, 6, 7, 9) 
    divq %6, %7, %4
    movq %4, %9

And

# produced by genOp3i(GEN_MOD_R, 6, 7, 9) 
    divq 6, %7, %4
    movq %5, %9

This certainly can be optimized. But in a first approach a working solution is sufficient, so keep it simple.

Quiz 25

Submit your solution with

1	submit hpc quiz25 gen.h gen.c finalize.h finalize.c

The underlying test is eventually to harsh. It for example requires that genGetReg() always returns the smallest register that is unused. This is actual not a requirement, just one why to fulfill the requirements. However, this makes it easier for us to test your code.