CBE Pt. 16: Simple File I/O

This is some minimalistic introduction to file I/O in C. It is limited to handling text files.

Open a File for Reading or Writing

Function fopen()

1
FILE *fopen(const char *filename, const char *mode);

declared in <stdio.h> can be used to open a file for reading or writing. Parameter filename is the path to a file (e.g. “foo.txt”,“../foo.txt”, “/usr/include/ar.h”). Using “w” for mode will attempt to open the file for writing and “r” for reading. For example

1
2
FILE *in = fopen("foo.txt", "r");
FILE *out = fopen("bar.txt", "w");

attempts to open file “foo.txt” for reading and file “bar.txt” for writing. On success fopen() returns an open file pointer. On failure it returns a null pointer. Opening a file for reading for example fails if the file does not exists. Opening a file for writing can fail if the file is a directory or can not be created for other reasons (e.g. insufficient permissions).

Closing a File

Before you program terminates you should close the file with fclose():

1
int fclose(FILE *stream);

Operation for Reading and Writing

Function getline() (see the test program for unique strings in Session 17, Page 3)

1
ssize_t getline(char **lineptr, size_t *n, FILE *in);

can be used to read from a file by using an open file pointer in reading mode.

Function fprintf()

1
int fprintf(FILE *out, const char *format, ...);

can be used to write to a text file by using an open file pointer in write mode.

Generating Enum Constants

Consider this text file for some token kinds

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
EOI
BAD_TOKEN
HEX_LITERAL
OCT_LITERAL
DEC_LITERAL
PLUS
MINUS
ASTERISK
SLASH
PERCENT
EQUAL
LPAREN
RPAREN
SEMICOLON
CARET
IDENTIFIER

From this we want to generate the following type enum TokenKind and these enum constants:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
enum TokenKind
{
    EOI,
    BAD_TOKEN,
    HEX_LITERAL,
    OCT_LITERAL,
    DEC_LITERAL,
    PLUS,
    MINUS,
    ASTERISK,
    SLASH,
    PERCENT,
    EQUAL,
    LPAREN,
    RPAREN,
    SEMICOLON,
    CARET,
    IDENTIFIER,
};

This can be done with this program:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#include <stdlib.h>
#include <stdio.h>

void
printEnumHeader(FILE *out)
{
    fprintf(out, "enum TokenKind\n");
    fprintf(out, "{\n");
}

void
printEnumFooter(FILE *out)
{
    fprintf(out, "};\n");
}

void
usage(const char *prg)
{
    fprintf(stderr, "usage: %s input output\n", prg);
    exit(1);
}

int
main(int argc, char *argv[])
{
    if (argc != 3) {
        usage(argv[0]);
    }

    FILE *in = fopen(argv[1], "r");
    FILE *outEnum = fopen(argv[2], "w");

    if (!in) {
        fprintf(stderr, "can not open input file '%s'\n", argv[1]);
    }
    if (!outEnum) {
        fprintf(stderr, "can not open output file '%s'\n", argv[2]);
    }

    printEnumHeader(outEnum);

    char *line = 0;
    size_t capacity = 0;
    ssize_t len;
    while ((len = getline(&line, &capacity, in)) > 0) {
        line[len - 1] = 0;
        fprintf(outEnum, "%*s%s,\n", 4, "", line);

    }
    free(line);

    printEnumFooter(outEnum);

    fclose(in);
    fclose(outEnum);
}

Here how the demo:

theon$ gcc -Wall -o make_tokenkind make_tokenkind.c
theon$ ./make_tokenkind tokenkind.txt gen_tokenkind.h
theon$ cat gen_tokenkind.h
enum TokenKind
{
    EOI,
    BAD_TOKEN,
    HEX_LITERAL,
    OCT_LITERAL,
    DEC_LITERAL,
    PLUS,
    MINUS,
    ASTERISK,
    SLASH,
    PERCENT,
    EQUAL,
    LPAREN,
    RPAREN,
    SEMICOLON,
    CARET,
    IDENTIFIER,
};
theon$ 

Quiz 22: Generate strTokenKind()

Extend the program above so that it can be used as follows:

1
./make_tokenkind tokenkind.txt gen_tokenkind.h gen_strtokenkind.c

Compared to the program above a third output file can be specified to generate the source code for a function strTokenKind() that returns a string representation for an enum constant. For tokenkind.txt from above this would be the following:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
const char *
strTokenKind(enum TokenKind tokenKind)
{
    switch (tokenKind) {
        case EOI:
            return "EOI";
        case BAD_TOKEN:
            return "BAD_TOKEN";
        case HEX_LITERAL:
            return "HEX_LITERAL";
        case OCT_LITERAL:
            return "OCT_LITERAL";
        case DEC_LITERAL:
            return "DEC_LITERAL";
        case PLUS:
            return "PLUS";
        case MINUS:
            return "MINUS";
        case ASTERISK:
            return "ASTERISK";
        case SLASH:
            return "SLASH";
        case PERCENT:
            return "PERCENT";
        case EQUAL:
            return "EQUAL";
        case LPAREN:
            return "LPAREN";
        case RPAREN:
            return "RPAREN";
        case SEMICOLON:
            return "SEMICOLON";
        case CARET:
            return "CARET";
        case IDENTIFIER:
            return "IDENTIFIER";
        default:
            fprintf(stderr, "internal error in strTokenKind: tokenKind = %d\n",
                    tokenKind);
            exit(1);
            return "";
    }
}

Of course the generated source code should have proper indentation!

Submit your program with

1
submit hpc quiz22 make_tokenkind.c

Submit will compile your program and then generates output from a input file:

1
2
gcc -Werror -Wall -Wcast-qual -o make_tokenkind make_tokenkind.c
./make_tokenkind tokenkind.txt gen_tokenkind.h gen_strtokenkind.c

It will then try to generate an object file with

gcc -c tokenkind.c

where

1
2
3
4
5
6
7
8
#ifndef TOKENKIND_H
#define TOKENKIND_H

#include "gen_tokenkind.h"

const char *strTokenKind(enum TokenKind tokenKind);

#endif // TOKENKIND_H

and

1
2
3
4
5
6
#include <stdlib.h>
#include <stdio.h>

#include "tokenkind.h"

#include "gen_strtokenkind.c"

will be used in addition.