summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLieuwe Rooijakkers <lieuwerooijakkers@gmail.com>2024-08-19 01:02:26 +0200
committerLieuwe Rooijakkers <lieuwerooijakkers@gmail.com>2024-08-19 01:02:30 +0200
commit52f28c38d0d1d7d6036495b3ecadf3cd82dd1e50 (patch)
tree3718d8f1ee479b9181a8509d8f65dfa3b183f044
parent7298453390da42d4fcd21d2911e2d14b2187ceff (diff)
toilet: don't read whole file in memory
-rw-r--r--src/toilet.c96
1 files changed, 60 insertions, 36 deletions
diff --git a/src/toilet.c b/src/toilet.c
index de32003..74cafb2 100644
--- a/src/toilet.c
+++ b/src/toilet.c
@@ -1,9 +1,11 @@
#include <assert.h>
#include <ctype.h>
+#include <errno.h>
#include <getopt.h>
#include <limits.h>
#include <pwd.h>
#include <stdbool.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
@@ -11,7 +13,7 @@
#include "util/debug.h"
#include "util/error.h"
-#include "util/loop_files.h"
+#include "util/loop_args.h"
#include "util/map.h"
#include "util/versie.h"
@@ -73,66 +75,88 @@ static char** parse_options(int argc, char **argv, int *modeMap) {
return argv + optind;
}
-static size_t get_count(enum MODE mode, struct filebuf *fb) {
- switch (mode) {
- case M_BYTES:
- return fb->sz;
+static size_t count_lines(char *fname, FILE *f) {
+ size_t nlines = 0;
- case M_WORDS: {
-#define IN(i) (i < fb->sz)
+ {
+ char *line = NULL;
+ size_t linen = 0;
+ while ((errno = 0, getline(&line, &linen, f)) != -1) {
+ nlines++;
+ }
+ free(line);
+ }
- size_t words = 0;
+ if (errno != 0) {
+ printf("toilet: fout bij lezen uit bestand '%s'\n", fname);
+ exit(1);
+ }
- // (c) Tom Forging
- for (size_t i = 0; IN(i);) {
- size_t previ = i;
- while (IN(i) && !isspace(fb->buf[i])) i++;
- words += i != previ;
- while (IN(i) && isspace(fb->buf[i])) i++;
- }
+ rewind(f);
+ return nlines;
+}
- return words;
+static size_t count_words(FILE *f) {
+#define BUF_SIZE 4096
-#undef IN
+ size_t nwords = 0;
+ static char buf[BUF_SIZE];
+
+ while (!feof(f)) {
+ const size_t n = fread(buf, 1, BUF_SIZE, f);
+
+#define IN(i) (i < n)
+ for (size_t i = 0; IN(i);) {
+ size_t previ = i;
+ while (IN(i) && !isspace(buf[i])) i++;
+ nwords += i != previ;
+ while (IN(i) && isspace(buf[i])) i++;
}
+#undef IN
- case M_LINES: {
- size_t lines = 0;
- size_t i = 0;
+ }
- while (i != fb->sz) {
- if (fb->buf[i] == '\n') lines++;
- i++;
- }
+ rewind(f);
+ return nwords;
- // handle case if file does not have trailing newline
- if (fb->buf[i - 1] != '\n') {
- lines++;
- }
+#undef BUF_SIZE
+}
- return lines;
+static size_t get_count(enum MODE mode, char *fname, FILE *f) {
+ switch (mode) {
+ case M_BYTES: {
+ fseek(f, 0, SEEK_END);
+ long offset = ftell(f);
+ rewind(f);
+ return offset;
}
+ case M_WORDS:
+ return count_words(f);
+
+ case M_LINES:
+ return count_lines(fname, f);
+
default:
assert(false);
}
}
-static int process(struct filebuf *fb, char *fname, bool) {
+static int process(char *fname, bool isstdin) {
+ FILE *f = isstdin ? stdin : fopen(fname, "r");
+
for (enum MODE mode = 1; mode <= M_BYTES; mode <<= 1) {
if (mode & modeMap) {
- const size_t count = get_count(mode, fb);
+ const size_t count = get_count(mode, fname, f);
printf("%li ", count);
}
}
printf("%s\n", fname);
- free_filebuf(fb);
+
+ if (!isstdin) fclose(f);
return 0;
}
-// TODO: be smarter, toilet doesn't have to read the whole file in memory (for
-// unmappable files)
-
int entry_toilet(int argc, char **argv) {
modeMap = 0;
char **args = parse_options(argc, argv, &modeMap);
@@ -140,5 +164,5 @@ int entry_toilet(int argc, char **argv) {
modeMap = INT_MAX;
}
- return loop_files(args, process);
+ return loop_args(args, process);
}