summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Smeding <tom@tomsmeding.com>2025-02-16 20:03:40 +0100
committerTom Smeding <tom@tomsmeding.com>2025-02-16 20:03:40 +0100
commit39148b1b6dc6c7a10f7e713ece084b6d5cf27537 (patch)
tree619c9d951b1356bfc8dcaba45a66166972992170
Initial
-rw-r--r--.gitignore5
-rw-r--r--Makefile38
-rw-r--r--filter.c115
-rw-r--r--filter.h29
-rw-r--r--main.c313
-rw-r--r--options.c95
-rw-r--r--options.h22
-rw-r--r--string.c41
-rw-r--r--string.h19
-rw-r--r--tree.c104
-rw-r--r--tree.h39
11 files changed, 820 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4b50199
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+.obj/
+.ccls-cache/
+fs-sample
+
+bak-options.txt
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..9dce218
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,38 @@
+# DEBUG := 1
+
+CC := gcc
+CFLAGS := -Wall -Wextra -std=c11
+LDFLAGS :=
+ifneq ($(DEBUG),)
+ CFLAGS += -g -fsanitize=address
+ LDFLAGS += -fsanitize=address
+else
+ CFLAGS += -O2
+endif
+
+# stpcpy(3), stat stuff
+CFLAGS += -D_POSIX_C_SOURCE=200809L -D_DEFAULT_SOURCE
+
+OBJDIR := .obj
+TARGET := fs-sample
+
+SOURCES := $(wildcard *.c)
+HEADERS := $(wildcard *.h)
+OBJECTS := $(patsubst %.c,$(OBJDIR)/%.o,$(SOURCES))
+
+.PHONY: all clean
+
+all: $(TARGET)
+
+clean:
+ rm -f $(TARGET)
+ rm -rf $(OBJDIR)
+
+$(TARGET): $(OBJECTS)
+ $(CC) -o $@ $^ $(LDFLAGS)
+
+$(OBJECTS): $(OBJDIR)/%.o: %.c $(HEADERS) | $(OBJDIR)
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+$(OBJDIR):
+ mkdir $@
diff --git a/filter.c b/filter.c
new file mode 100644
index 0000000..312704a
--- /dev/null
+++ b/filter.c
@@ -0,0 +1,115 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+#include "filter.h"
+
+
+struct filter_rule parse_exclude_rule(const char *rule) {
+ const size_t len = strlen(rule);
+
+ const bool open_left = rule[0] == '*';
+ if (open_left) {
+ if (len < 4 || memcmp(rule, "**/", 3) != 0) {
+ fprintf(stderr, "Error: A filter rule starting with '*' must start with '**/'.\n");
+ exit(1);
+ }
+ } else {
+ if (rule[0] != '/') {
+ fprintf(stderr, "Error: A filter rule with a non-wildcard left-hand side must start with '/'.\n");
+ exit(1);
+ }
+ }
+
+ const bool open_right = len > 0 && rule[len - 1] == '*';
+ if (open_right) {
+ if (len < 4 || memcmp(rule + len - 3, "/**", 3) != 0) {
+ fprintf(stderr, "Error: A filter rule ending with '*' must end with '/**'.\n");
+ exit(1);
+ }
+ }
+
+ const size_t str_start = open_left ? 3 : 1;
+ const size_t str_len = len - str_start - (open_right ? 3 : 0);
+
+ for (size_t i = 0; i < str_len; i++) {
+ if (rule[str_start + i] == '*') {
+ fprintf(stderr, "Error: Wildcards in the middle of a pattern unsupported.\n");
+ exit(1);
+ }
+ }
+
+ char *str = malloc(str_len + 1);
+ memcpy(str, rule + str_start, str_len);
+ str[str_len] = '\0';
+
+ return (struct filter_rule){
+ .ftype = FILTER_EXCLUDE,
+ .ptype = open_left && open_right ? PATTERN_INFIX
+ : open_left ? PATTERN_SUFFIX
+ : open_right ? PATTERN_PREFIX
+ : PATTERN_EQUAL,
+ .str = str,
+ .len = str_len,
+ };
+}
+
+bool filter_rule_allows(const struct filter_rule rule, const char *path) {
+ assert(rule.ftype == FILTER_EXCLUDE);
+
+ switch (rule.ptype) {
+ case PATTERN_EQUAL:
+ if (strcmp(path, rule.str) == 0) return false;
+ break;
+
+ case PATTERN_PREFIX:
+ if (strncmp(path, rule.str, rule.len) == 0 &&
+ (path[rule.len] == '/' || path[rule.len] == '\0'))
+ return false;
+ break;
+
+ case PATTERN_SUFFIX: {
+ const size_t pathlen = strlen(path);
+ if (pathlen >= rule.len &&
+ memcmp(path + pathlen - rule.len, rule.str, rule.len) == 0 &&
+ (pathlen == rule.len || path[pathlen - rule.len - 1] == '/'))
+ return false;
+ break;
+ }
+
+ case PATTERN_INFIX: {
+ const size_t pathlen = strlen(path);
+ if (strncmp(path, rule.str, rule.len) == 0 &&
+ (path[rule.len] == '\0' || path[rule.len] == '/')) return false;
+ for (size_t i = 0; i < pathlen; i++) {
+ if (path[i] == '/') {
+ if (strncmp(path + i+1, rule.str, rule.len) == 0 &&
+ (path[i+1 + rule.len] == '\0' || path[i+1 + rule.len] == '/')) return false;
+ }
+ }
+ break;
+ }
+ }
+
+ return true;
+}
+
+void filter_rule_debugdump(FILE *stream, const struct filter_rule rule) {
+ const char *ftype_str;
+ switch (rule.ftype) {
+ case FILTER_EXCLUDE: ftype_str = "exclude"; break;
+ default: ftype_str = "?unknown_ftype"; break;
+ }
+
+ const char *ptype_str;
+ switch (rule.ptype) {
+ case PATTERN_EQUAL: ptype_str = "equal"; break;
+ case PATTERN_PREFIX: ptype_str = "prefix"; break;
+ case PATTERN_SUFFIX: ptype_str = "suffix"; break;
+ case PATTERN_INFIX: ptype_str = "infix"; break;
+ default: ptype_str = "?unknown_ptype"; break;
+ }
+
+ fprintf(stream, "%s %s <%s>\n", ftype_str, ptype_str, rule.str);
+}
diff --git a/filter.h b/filter.h
new file mode 100644
index 0000000..2bc41e1
--- /dev/null
+++ b/filter.h
@@ -0,0 +1,29 @@
+#pragma once
+
+
+enum filter_type {
+ // FILTER_INCLUDE, // unsupported for now
+ FILTER_EXCLUDE,
+};
+
+// A subset of borg patterns
+enum pattern_type {
+ PATTERN_EQUAL, // /str
+ PATTERN_PREFIX, // /str/**
+ PATTERN_SUFFIX, // **/str
+ PATTERN_INFIX, // **/str/**
+};
+
+struct filter_rule {
+ enum filter_type ftype;
+ enum pattern_type ptype;
+ char *str;
+ size_t len; // length of str (strlen, but cached)
+};
+
+struct filter_rule parse_exclude_rule(const char *rule);
+
+// The path must be relative to the tree root (and thus not start with a '/').
+bool filter_rule_allows(const struct filter_rule rule, const char *path);
+
+void filter_rule_debugdump(FILE *stream, const struct filter_rule rule);
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..a326c2b
--- /dev/null
+++ b/main.c
@@ -0,0 +1,313 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <errno.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include "options.h"
+#include "string.h"
+#include "tree.h"
+
+
+static const char* describe_stat(mode_t mode) {
+ if (S_ISREG(mode)) return "file";
+ if (S_ISDIR(mode)) return "directory";
+ if (S_ISCHR(mode)) return "character device";
+ if (S_ISBLK(mode)) return "block device";
+ if (S_ISFIFO(mode)) return "FIFO";
+ if (S_ISLNK(mode)) return "symbolic link";
+ if (S_ISSOCK(mode)) return "socket";
+ return "?unknown";
+}
+
+static const char* describe_dirent(unsigned char type) {
+ switch (type) {
+ case DT_BLK: return "block device";
+ case DT_CHR: return "character device";
+ case DT_DIR: return "directory";
+ case DT_FIFO: return "FIFO";
+ case DT_LNK: return "symbolic link";
+ case DT_REG: return "file";
+ case DT_SOCK: return "socket";
+ case DT_UNKNOWN: return "unknown directory entry";
+ default: return "?unknown";
+ }
+}
+
+static int sort_pairs__compar(const void *p1_, const void *p2_) {
+ const struct tree_pair *p1 = p1_, *p2 = p2_;
+
+ // directories go first
+ if ((p1->sub != NULL) != (p2->sub != NULL))
+ return p1->sub != NULL ? -1 : 1;
+
+ // directories with more things in it go first
+ if (p1->sub != NULL) {
+ const size_t tot1 = p1->sub->total_files + p1->sub->total_dirs;
+ const size_t tot2 = p2->sub->total_files + p2->sub->total_dirs;
+ if (tot1 != tot2) return tot2 - tot1;
+ }
+
+ return strcmp(p1->name, p2->name);
+}
+
+static void sort_pairs(struct tree_pair *pairs, size_t num) {
+ qsort(pairs, num, sizeof(struct tree_pair), sort_pairs__compar);
+}
+
+struct linked_dir_node {
+ char *name;
+ struct tree_node *sub;
+ struct linked_dir_node *next;
+};
+
+// If this directory contained a cachetag, this_was_cache_dir is set to true
+// and NULL is returned.
+static struct tree_node* make_dir_tree(const struct options *opts, struct string *relpath, int dirfd, bool *this_was_cache_dir) {
+ // fprintf(stderr, "make_dir_tree(%s)\n", string_read(*relpath));
+
+ DIR *dir = fdopendir(dirfd);
+ if (dir == NULL) {
+ if (errno == EACCES) {
+ fprintf(stderr, "Warning: permission denied (fdopendir): %s\n", string_read(*relpath));
+ return tree_make(NULL, 0, 0);
+ }
+ perror("fdopendir");
+ exit(1);
+ }
+
+ struct linked_dir_node *first = NULL;
+ struct linked_dir_node *last = NULL;
+ size_t num_dir_nodes = 0;
+
+ size_t disk_size = 0;
+
+ const size_t dirpathlen = relpath->len;
+
+ while (true) {
+ errno = 0;
+ struct dirent *ent = readdir(dir);
+ if (ent == NULL) {
+ if (errno == 0) break; // end of directory listing
+ perror("readdir");
+ exit(1);
+ }
+
+ if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
+ continue;
+ }
+
+ for (size_t i = 0; i < opts->ncachetags; i++) {
+ if (strcmp(ent->d_name, opts->cachetags[i]) == 0) {
+ *this_was_cache_dir = true;
+ while (first != NULL) {
+ free(first->name);
+ if (first->sub != NULL) tree_free(first->sub);
+ struct linked_dir_node *next = first->next;
+ free(first);
+ first = next;
+ }
+ closedir(dir);
+ return NULL;
+ }
+ }
+
+ // Do the truncate here, not on loop continue, because there's many
+ // continue points and I'd forget.
+ string_truncate(relpath, dirpathlen);
+ if (dirpathlen > 0) string_append(relpath, "/");
+ string_append(relpath, ent->d_name);
+
+ for (size_t i = 0; i < opts->nfilters; i++) {
+ if (!filter_rule_allows(opts->filters[i], string_read(*relpath))) {
+ if (opts->debug)
+ fprintf(stderr, "Excluded: <%s>\n", string_read(*relpath));
+ goto skip_this_dirent;
+ }
+ }
+
+ struct stat st;
+ bool have_st = false;
+ bool is_dir;
+
+ if (ent->d_type == DT_UNKNOWN) {
+ int ret = fstatat(dirfd, ent->d_name, &st, 0);
+ if (ret < 0) {
+ if (errno == EACCES) {
+ fprintf(stderr, "Warning: permission denied (fstatat): %s\n", string_read(*relpath));
+ continue;
+ }
+ perror("fstatat");
+ exit(1);
+ }
+
+ have_st = true;
+ if (S_ISDIR(st.st_mode)) is_dir = true;
+ else if (S_ISREG(st.st_mode)) is_dir = false;
+ else {
+ if (!S_ISLNK(st.st_mode))
+ fprintf(stderr, "Warning: skipping %s: %s\n", describe_stat(st.st_mode), string_read(*relpath));
+ continue;
+ }
+ } else if (ent->d_type == DT_REG) {
+ is_dir = false;
+ } else if (ent->d_type == DT_DIR) {
+ is_dir = true;
+ } else {
+ if (ent->d_type != DT_LNK)
+ fprintf(stderr, "Warning: skipping %s: %s\n", describe_dirent(ent->d_type), string_read(*relpath));
+ continue;
+ }
+
+ struct tree_node *sub = NULL;
+
+ if (is_dir) {
+ int fd = openat(dirfd, ent->d_name, O_DIRECTORY);
+ if (fd < 0) {
+ if (errno == EACCES) {
+ fprintf(stderr, "Warning: permission denied (openat): %s\n", string_read(*relpath));
+ continue;
+ }
+ perror("openat");
+ exit(1);
+ }
+
+ bool was_cache_dir = false;
+ sub = make_dir_tree(opts, relpath, fd, &was_cache_dir);
+ close(fd);
+
+ if (was_cache_dir) { assert(sub == NULL); goto skip_this_dirent; } // no need to free 'sub'
+ } else {
+ if (!have_st) {
+ int ret = fstatat(dirfd, ent->d_name, &st, 0);
+ if (ret < 0) {
+ if (errno == EACCES) {
+ fprintf(stderr, "Warning: permission denied (fstatat): %s\n", string_read(*relpath));
+ continue;
+ }
+ perror("fstatat");
+ exit(1);
+ }
+ have_st = true;
+ }
+ disk_size += (st.st_size + st.st_blksize - 1) / st.st_blksize * st.st_blksize;
+ }
+
+ struct linked_dir_node *lnode = malloc(sizeof(struct linked_dir_node));
+ lnode->name = strdup(ent->d_name);
+ lnode->sub = sub;
+ lnode->next = NULL;
+
+ if (last == NULL) first = last = lnode;
+ else { last->next = lnode; last = lnode; }
+ num_dir_nodes++;
+
+ skip_this_dirent:
+ ;
+ }
+
+ string_truncate(relpath, dirpathlen);
+
+ if (last != NULL) disk_size += 4096; // add 4K for a non-empty directory node
+
+ closedir(dir);
+
+ struct tree_pair *pairs = malloc(num_dir_nodes * sizeof(struct tree_pair));
+ {
+ struct linked_dir_node *lnode = first;
+ for (size_t i = 0; i < num_dir_nodes; i++) {
+ pairs[i].name = lnode->name;
+ pairs[i].sub = lnode->sub;
+ lnode = lnode->next;
+ }
+ }
+
+ sort_pairs(pairs, num_dir_nodes);
+
+ struct tree_node *result = tree_make(pairs, num_dir_nodes, disk_size);
+
+ while (first != NULL) {
+ free(first->name);
+ struct linked_dir_node *next = first->next;
+ free(first);
+ first = next;
+ }
+ free(pairs);
+
+ return result;
+}
+
+static struct tree_node* make_dir_tree_root(const struct options *opts, const char *rootpath) {
+ int fd = open(rootpath, O_DIRECTORY);
+ if (fd < 0) {
+ perror("open");
+ exit(1);
+ }
+
+ struct string s = string_make("");
+
+ bool was_cache_dir = false;
+ struct tree_node *tree = make_dir_tree(opts, &s, fd, &was_cache_dir);
+ close(fd);
+
+ string_free(s);
+
+ if (was_cache_dir) {
+ fprintf(stderr, "Error: Root is a cache directory (contains a cache tag)\n");
+ exit(1);
+ }
+
+ return tree;
+}
+
+static void print_tree(struct tree_node *node, struct string *prefix, int maxdepth) {
+ if (maxdepth <= 0) return;
+
+ struct tree_iter iter = tree_iter_start(node);
+ struct tree_pair pair;
+ while (tree_iter_next(&iter, &pair)) {
+ if (pair.sub == NULL) {
+ // printf("%s- %s\n", string_read(*prefix), pair.name);
+ } else {
+ printf("%s- %s/: files=%zu dirs=%zu size=%zu\n",
+ string_read(*prefix), pair.name,
+ pair.sub->total_files, pair.sub->total_dirs, pair.sub->total_size);
+
+ const size_t origlen = string_append(prefix, " ");
+ print_tree(pair.sub, prefix, maxdepth - 1);
+ string_truncate(prefix, origlen);
+ }
+ }
+}
+
+int main(int argc, char **argv) {
+ if (argc <= 1) {
+ fprintf(stderr, "Usage: %s [options...] <root>\n", argv[0]);
+ return 1;
+ }
+
+ struct options opts = parse_options(argc, argv);
+
+ if (opts.debug) {
+ for (size_t i = 0; i < opts.nfilters; i++) {
+ fprintf(stderr, "FR: ");
+ filter_rule_debugdump(stderr, opts.filters[i]);
+ }
+ }
+
+ struct tree_node *root = make_dir_tree_root(&opts, opts.rootpath);
+
+ printf("files=%zu dirs=%zu size=%zu\n",
+ root->total_files, root->total_dirs, root->total_size);
+
+ struct string prefix = string_make("");
+ print_tree(root, &prefix, opts.print_depth);
+ string_free(prefix);
+
+ tree_free(root);
+}
diff --git a/options.c b/options.c
new file mode 100644
index 0000000..2619602
--- /dev/null
+++ b/options.c
@@ -0,0 +1,95 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "options.h"
+
+
+struct options parse_options(int argc, char **argv) {
+ size_t filters_cap = 32, filters_len = 0;
+ struct filter_rule *filters = malloc(filters_cap * sizeof(struct filter_rule));
+
+ size_t cachetags_cap = 32, cachetags_len = 0;
+ char **cachetags = malloc(cachetags_cap * sizeof(char*));
+
+ char *rootpath = NULL;
+
+ int print_depth = 1;
+
+ bool debug = false;
+
+ for (int i = 1; i < argc; i++) {
+ if (argv[i][0] != '-') {
+ if (rootpath) {
+ fprintf(stderr, "Multiple root paths given\n");
+ exit(1);
+ }
+ rootpath = strdup(argv[i]);
+
+ } else if (strcmp(argv[i], "--exclude") == 0) {
+ if (i + 1 >= argc) {
+ fprintf(stderr, "'--exclude' takes an argument\n");
+ exit(1);
+ }
+
+ if (filters_len >= filters_cap) {
+ filters_cap *= 2;
+ filters = realloc(filters, filters_cap * sizeof(struct filter_rule));
+ }
+ struct filter_rule rule = parse_exclude_rule(argv[i+1]);
+ filters[filters_len++] = rule;
+
+ i++;
+
+ } else if (strcmp(argv[i], "--exclude-if-present") == 0) {
+ if (i + 1 >= argc) {
+ fprintf(stderr, "'--exclude-if-present' takes an argument\n");
+ exit(1);
+ }
+
+ if (cachetags_len >= cachetags_cap) {
+ cachetags_cap *= 2;
+ cachetags = realloc(cachetags, cachetags_cap * sizeof(char*));
+ }
+ cachetags[cachetags_len++] = strdup(argv[i+1]);
+
+ i++;
+
+ } else if (strcmp(argv[i], "-d") == 0) {
+ if (i + 1 >= argc) {
+ fprintf(stderr, "'-d' takes an argument\n");
+ exit(1);
+ }
+
+ char *endp;
+ print_depth = strtol(argv[i+1], &endp, 10);
+ if (!*argv[i+1] || *endp) {
+ fprintf(stderr, "Invalid argument to '-d': '%s'\n", argv[i+1]);
+ exit(1);
+ }
+
+ i++;
+
+ } else if (strcmp(argv[i], "--debug") == 0) {
+ debug = true;
+
+ } else {
+ fprintf(stderr, "Unrecognised argument: '%s'\n", argv[i]);
+ exit(1);
+ }
+ }
+
+ if (rootpath == NULL) {
+ fprintf(stderr, "No root path provided\n");
+ exit(1);
+ }
+
+ return (struct options){
+ .nfilters = filters_len,
+ .filters = filters,
+ .ncachetags = cachetags_len,
+ .cachetags = cachetags,
+ .rootpath = rootpath,
+ .print_depth = print_depth,
+ .debug = debug,
+ };
+}
diff --git a/options.h b/options.h
new file mode 100644
index 0000000..f015c33
--- /dev/null
+++ b/options.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include "filter.h"
+
+
+struct options {
+ size_t nfilters;
+ struct filter_rule *filters;
+
+ size_t ncachetags;
+ char **cachetags;
+
+ char *rootpath;
+
+ int print_depth;
+
+ bool debug;
+};
+
+struct options parse_options(int argc, char **argv);
diff --git a/string.c b/string.c
new file mode 100644
index 0000000..b85b46a
--- /dev/null
+++ b/string.c
@@ -0,0 +1,41 @@
+#include <stdlib.h>
+#include <string.h>
+#include "string.h"
+
+
+struct string string_make(const char *str) {
+ struct string s;
+ s.len = strlen(str);
+ s.cap = s.len + 1;
+ s.data = malloc(s.cap);
+ memcpy(s.data, str, s.cap);
+ return s;
+}
+
+void string_free(struct string s) {
+ free(s.data);
+}
+
+char* string_read(struct string s) {
+ return s.data;
+}
+
+size_t string_append(struct string *s, char *arg) {
+ const size_t arglen = strlen(arg);
+ if (s->len + arglen + 1 > s->cap) {
+ size_t newcap = s->cap * 2;
+ while (s->len + arglen + 1 > newcap) newcap *= 2;
+ s->data = realloc(s->data, newcap);
+ }
+ memcpy(s->data + s->len, arg, arglen + 1);
+ const size_t origlen = s->len;
+ s->len += arglen;
+ return origlen;
+}
+
+void string_truncate(struct string *s, size_t length) {
+ if (length < s->len) {
+ s->len = length;
+ s->data[length] = '\0';
+ }
+}
diff --git a/string.h b/string.h
new file mode 100644
index 0000000..ecb49b6
--- /dev/null
+++ b/string.h
@@ -0,0 +1,19 @@
+#pragma once
+
+
+struct string {
+ size_t len, cap;
+ char *data; // null-terminated
+};
+
+struct string string_make(const char *str);
+
+void string_free(struct string s);
+
+// Do not modify the length of the string.
+char* string_read(struct string s);
+
+// Returns original length.
+size_t string_append(struct string *s, char *arg);
+
+void string_truncate(struct string *s, size_t length);
diff --git a/tree.c b/tree.c
new file mode 100644
index 0000000..830a886
--- /dev/null
+++ b/tree.c
@@ -0,0 +1,104 @@
+#include <stdlib.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <string.h>
+#include "tree.h"
+
+
+struct tree_node* tree_make(const struct tree_pair *pairs, size_t npairs, size_t disk_size) {
+ size_t datalen = 0;
+ for (size_t i = 0; i < npairs; i++) {
+ datalen += strlen(pairs[i].name) + 1 + sizeof(struct tree_node*);
+ }
+ datalen += 1;
+
+ struct tree_node *const node = malloc(sizeof(struct tree_node));
+ node->data = malloc(datalen);
+
+ size_t total_files = 0;
+ size_t total_dirs = 0;
+ size_t total_size = disk_size;
+
+ size_t cursor = 0;
+ for (size_t i = 0; i < npairs; i++) {
+ if (pairs[i].sub == NULL) {
+ total_files++;
+ } else {
+ total_files += pairs[i].sub->total_files;
+ total_dirs += 1 + pairs[i].sub->total_dirs; // include this directory too
+ total_size += pairs[i].sub->total_size;
+ }
+
+ uint8_t *dst = node->data + cursor;
+ cursor += (uint8_t*)stpcpy((char*)dst, pairs[i].name) - dst;
+ cursor++;
+ *(struct tree_node**)(node->data + cursor) = pairs[i].sub;
+ cursor += sizeof(struct tree_node*);
+ }
+ node->data[cursor] = 0;
+
+ node->total_files = total_files;
+ node->total_dirs = total_dirs;
+ node->total_size = total_size;
+
+ return node;
+}
+
+#define LOOP_NODE(node_, namevar_, subvar_, ...) do { \
+ size_t _cursor = 0; \
+ while (true) { \
+ namevar_ = (char*)((node_)->data + _cursor); \
+ size_t _len = strlen((char*)((node_)->data + _cursor)); \
+ if (_len == 0) break; \
+ _cursor += _len + 1; \
+ subvar_ = *(struct tree_node**)((node_)->data + _cursor); \
+ _cursor += sizeof(struct tree_node*); \
+ { __VA_ARGS__ }; \
+ } \
+ } while (0)
+
+void tree_free(struct tree_node *node) {
+ char *name; (void)name;
+ struct tree_node *sub;
+ LOOP_NODE(node, name, sub, {
+ if (sub != NULL) tree_free(sub);
+ });
+ free(node->data);
+ free(node);
+}
+
+size_t tree_num_children(const struct tree_node *node) {
+ char *name; (void)name;
+ struct tree_node *sub; (void)sub;
+ size_t count = 0;
+ LOOP_NODE(node, name, sub, {
+ count++;
+ });
+ return count;
+}
+
+struct tree_pair tree_index(struct tree_node *node, size_t index) {
+ char *name;
+ struct tree_node *sub;
+ size_t count = 0;
+ LOOP_NODE(node, name, sub, {
+ if (count == index) return (struct tree_pair){.name = name, .sub = sub};
+ count++;
+ });
+ assert(false);
+}
+
+struct tree_iter tree_iter_start(struct tree_node *node) {
+ return (struct tree_iter){.node = node, .off = 0};
+}
+
+bool tree_iter_next(struct tree_iter *iter, struct tree_pair *dst) {
+ struct tree_node *node = iter->node;
+ if (node->data[iter->off] == 0) return false;
+
+ dst->name = (char*)(node->data + iter->off);
+ const size_t namelen = strlen(dst->name);
+ dst->sub = *(struct tree_node**)(node->data + iter->off + namelen + 1);
+ iter->off += namelen + 1 + sizeof(struct tree_node*);
+ return true;
+}
diff --git a/tree.h b/tree.h
new file mode 100644
index 0000000..86a9521
--- /dev/null
+++ b/tree.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+
+// The data field is a compact representation of a
+// [(string, struct tree_node*)] list. The strings are all non-empty.
+// Representation: for every list element, first a null-terminated sequence of
+// chars (the string), then sizeof(void*) bytes (the pointer, potentially
+// NULL). After the last list element, an additional 0 byte, which could be
+// interpreted as an empty string, but instead means the end of the list.
+struct tree_node {
+ size_t total_files;
+ size_t total_dirs;
+ size_t total_size; // sum of on-disk sizes (in bytes)
+ uint8_t *data;
+};
+
+// A single element in a child list.
+struct tree_pair {
+ const char *name; // ownership is retained by the producer of this tree_pair
+ struct tree_node *sub; // can be NULL
+};
+
+// The names are copied out of 'pairs'.
+struct tree_node* tree_make(const struct tree_pair *pairs, size_t npairs, size_t disk_size);
+
+// Frees recursively.
+void tree_free(struct tree_node *node);
+
+size_t tree_num_children(const struct tree_node *node);
+
+struct tree_pair tree_index(struct tree_node *node, size_t index);
+
+struct tree_iter { struct tree_node *node; size_t off; };
+struct tree_iter tree_iter_start(struct tree_node *node);
+// Returns whether there was another child
+bool tree_iter_next(struct tree_iter *iter, struct tree_pair *dst);