Data Structure for Unique Strings
Provided Material
Here the header file and the test program for the unique strings:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | #ifndef UTILS_USTR_H
#define UTILS_USTR_H
#include <stdbool.h>
#include <stddef.h>
struct UStr
{
size_t len;
char cstr[];
};
const struct UStr *UStrAdd_(const char *s, bool *added);
const struct UStr *UStrAdd(const char *s);
void UStrPrintPool(void);
#endif // UTILS_USTR_H
|
#include <stdlib.h>
#include <stdio.h>
#include "ustr.h"
int
main(void)
{
const struct UStr *kw[] = {
UStrAdd("if"),
UStrAdd("for"),
UStrAdd("while"),
};
char *line = 0;
size_t capacity = 0;
ssize_t len;
while ((len = getline(&line, &capacity, stdin)) > 0) {
line[len - 1] = 0;
bool added, error = false;
const struct UStr *identifier = UStrAdd_(line, &added);
for (size_t i = 0; i < sizeof(kw) / sizeof(kw[0]); ++i) {
if (kw[i] == identifier) {
printf("can not use keyword as identifier\n");
error = true;
}
}
if (error) {
continue;
}
if (added) {
printf("new ");
}
printf("identifier '%s'\n", identifier->cstr);
}
free(line); // ok even if line is null pointer
printf("List of unique strings:\n");
UStrPrintPool();
}
And here a skeleton for the source file ustr.c:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | #include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "ustr.h"
/*
* List node for storing a unique string as element
*/
struct Node
{
struct Node *next;
struct UStr ustr;
};
/*
* list head: list points to the first list node
*/
static struct Node *list;
const struct UStr *
UStrAdd_(const char *s, bool *added)
{
size_t len = strlen(s);
/*
* if added is the null pointer it gets ignored. Otherwise initialized with
* true.
*/
if (added) {
*added = true;
}
/*
* Search in list if it already contains string s
*/
for (struct Node *n = list; n; n = n->next) {
/*
* TODO:
* - Check if node n contains the string
* - If this is the case:
* - If added is not the null pointer set *added to false
* - return a pointer to the unique string object stored in this node
*/
}
/*
* String needs to be added
*/
struct Node *n = 0; // <- TODO: Allocate a sufficiently large memory block
// for a node that can store a string element
// with length len.
// Do not forget the null byte!
if (!n) {
fprintf(stderr, "makeUStr: out of memory\n");
abort();
}
/*
* Initialize the unique string object stored in this node:
*/
n->ustr.len = len;
// TODO: Initialize also n->ustr.cstr by copying s into this buffer
/*
* TODO: Append the new node to the head of the list. I.e. the new node
* will become the new head of the list. The successor of the new
* node will be the old list head.
*/
/*
* Return a pointer to the unique string object
*/
return &n->ustr;
}
const struct UStr *
UStrAdd(const char *s)
{
return UStrAdd_(s, 0);
}
void
UStrPrintPool(void)
{
for (struct Node *n = list; n; n = n->next) {
printf("%s\n", n->ustr.cstr);
}
}
|
Allocating a List Node
In the header ustr.h we declare the following struct for a list element:
1 2 3 4 5 | struct UStr
{
size_t len;
char cstr[];
};
|
Note that the member cstr of this struct is an array of characters. That means it is the address of the first byte that follows the member len in memory. Assume you define a variable ustr of type struct UStr as follows
1 | struct UStr ustr;
|
then you can describe the memory layout with
For this you also see that the size of struct UStr is just the size of size_t.
The list node can be declared in the source file as follows:
1 2 3 4 5 | struct Node
{
struct Node *next;
struct UStr ustr;
};
|
Assume you would define
1 | struct Node node;
|
then the memory layout can be described with
If you use dynamic memory allocation like here
1 | struct Node *n = malloc(sizeof(*n));
|
we would have:
For storing some string in such a node we simply need to allocate some additional bytes. So let's allocate memory for a list node like that:
1 | struct Node *n = malloc(sizeof(*n) + 4);
|
then we have the following layout in memory:
We allocated memory for n->next and n->ustr.len and 4 addition bytes. Of course this 4 bytes now could now be used to store a string in this list node. The length of this string should not exceed 3 characters as we also have to store a terminating zero byte. So after
1 | strcpy(n->ustr.cstr, "Foo");
|
we would have the following memory layout:
Quiz 19: Implement the Unique String Class
Implement the unique string class. Submit your implementation with:
1 | submit hpc quiz19 ustr.c ustr.h xtest_ustr.c
|