From 52f28c38d0d1d7d6036495b3ecadf3cd82dd1e50 Mon Sep 17 00:00:00 2001 From: Lieuwe Rooijakkers Date: Mon, 19 Aug 2024 01:02:26 +0200 Subject: toilet: don't read whole file in memory --- src/toilet.c | 96 +++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 60 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/toilet.c b/src/toilet.c index de32003..74cafb2 100644 --- a/src/toilet.c +++ b/src/toilet.c @@ -1,9 +1,11 @@ #include #include +#include #include #include #include #include +#include #include #include #include @@ -11,7 +13,7 @@ #include "util/debug.h" #include "util/error.h" -#include "util/loop_files.h" +#include "util/loop_args.h" #include "util/map.h" #include "util/versie.h" @@ -73,66 +75,88 @@ static char** parse_options(int argc, char **argv, int *modeMap) { return argv + optind; } -static size_t get_count(enum MODE mode, struct filebuf *fb) { - switch (mode) { - case M_BYTES: - return fb->sz; +static size_t count_lines(char *fname, FILE *f) { + size_t nlines = 0; - case M_WORDS: { -#define IN(i) (i < fb->sz) + { + char *line = NULL; + size_t linen = 0; + while ((errno = 0, getline(&line, &linen, f)) != -1) { + nlines++; + } + free(line); + } - size_t words = 0; + if (errno != 0) { + printf("toilet: fout bij lezen uit bestand '%s'\n", fname); + exit(1); + } - // (c) Tom Forging - for (size_t i = 0; IN(i);) { - size_t previ = i; - while (IN(i) && !isspace(fb->buf[i])) i++; - words += i != previ; - while (IN(i) && isspace(fb->buf[i])) i++; - } + rewind(f); + return nlines; +} - return words; +static size_t count_words(FILE *f) { +#define BUF_SIZE 4096 -#undef IN + size_t nwords = 0; + static char buf[BUF_SIZE]; + + while (!feof(f)) { + const size_t n = fread(buf, 1, BUF_SIZE, f); + +#define IN(i) (i < n) + for (size_t i = 0; IN(i);) { + size_t previ = i; + while (IN(i) && !isspace(buf[i])) i++; + nwords += i != previ; + while (IN(i) && isspace(buf[i])) i++; } +#undef IN - case M_LINES: { - size_t lines = 0; - size_t i = 0; + } - while (i != fb->sz) { - if (fb->buf[i] == '\n') lines++; - i++; - } + rewind(f); + return nwords; - // handle case if file does not have trailing newline - if (fb->buf[i - 1] != '\n') { - lines++; - } +#undef BUF_SIZE +} - return lines; +static size_t get_count(enum MODE mode, char *fname, FILE *f) { + switch (mode) { + case M_BYTES: { + fseek(f, 0, SEEK_END); + long offset = ftell(f); + rewind(f); + return offset; } + case M_WORDS: + return count_words(f); + + case M_LINES: + return count_lines(fname, f); + default: assert(false); } } -static int process(struct filebuf *fb, char *fname, bool) { +static int process(char *fname, bool isstdin) { + FILE *f = isstdin ? stdin : fopen(fname, "r"); + for (enum MODE mode = 1; mode <= M_BYTES; mode <<= 1) { if (mode & modeMap) { - const size_t count = get_count(mode, fb); + const size_t count = get_count(mode, fname, f); printf("%li ", count); } } printf("%s\n", fname); - free_filebuf(fb); + + if (!isstdin) fclose(f); return 0; } -// TODO: be smarter, toilet doesn't have to read the whole file in memory (for -// unmappable files) - int entry_toilet(int argc, char **argv) { modeMap = 0; char **args = parse_options(argc, argv, &modeMap); @@ -140,5 +164,5 @@ int entry_toilet(int argc, char **argv) { modeMap = INT_MAX; } - return loop_files(args, process); + return loop_args(args, process); } -- cgit v1.2.3-70-g09d2