From b006ae324da88e3280914b3d00585a740057d4c8 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Thu, 16 Oct 2025 21:49:02 +0200 Subject: Initial --- .gitignore | 2 + Makefile | 22 +++++ example.txt | 8 ++ tabulate.cpp | 302 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 334 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 example.txt create mode 100644 tabulate.cpp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1d9613c --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.o +tabulate diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..00953bd --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ +CXX = g++ +CXXFLAGS = -Wall -Wextra -std=c++17 -fwrapv +ifneq ($(DEBUG),) + CXXFLAGS += -g +else + CXXFLAGS += -O2 +endif +BIN = tabulate + +.PHONY: all clean + +all: $(BIN) + +clean: + rm -f $(BIN) *.o + + +$(BIN): $(patsubst %.cpp,%.o,$(wildcard *.cpp)) + $(CXX) -o $@ $^ + +%.o: %.cpp $(wildcard *.h) + $(CXX) $(CXXFLAGS) -c -o $@ $< diff --git a/example.txt b/example.txt new file mode 100644 index 0000000..b2b7e1a --- /dev/null +++ b/example.txt @@ -0,0 +1,8 @@ +after neural/default 106.6 μs +after neural/accum 107.0 μs +after gmm/default 1.931 ms +after gmm/accum 1.780 ms +before neural/default 157.1 μs +before neural/accum 117.9 μs +before gmm/default 2.087 ms +before gmm/accum 2.040 ms diff --git a/tabulate.cpp b/tabulate.cpp new file mode 100644 index 0000000..f5cfb4f --- /dev/null +++ b/tabulate.cpp @@ -0,0 +1,302 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +static void usage(const char *argv0) { + std::cerr << + "Usage: " << argv0 << " [options] \n" + " " << argv0 << " [options] \n" + " " << argv0 << " [options] \n" + "Splits lines on a separator character (-s) and creates tables from the result.\n" + "Each positional argument is a field index specification (after splitting) that\n" + "determines which parts of the input lines get used for what purpose.\n" + "A field index specification is like that of the '-f' flag to cut(1). If multiple\n" + "fields are specified for a particular purpose, they are joined using the\n" + "separator character and henceforth treated as a single string. If a table cell\n" + "has multiple values in the input, the values are appended using the separator\n" + "character.\n" + "Row, column and table labels are printed in order of occurrence in the input.\n" + "UTF-8 is assumed for string-length calculation.\n" + "\n" + "Options:\n" + " -h Show help.\n" + " -s DELIM The character that separates fields. Default space (' ').\n"; +} + +struct Range { + int from, to; +}; + +struct Fieldspec { + std::vector ranges; + bool empty() const { return ranges.empty(); } +}; + +static std::pair parse_int(const char *str) { + errno = 0; + const char *endp = NULL; + long val = strtol(str, (char**)&endp, 10); + if (str[0] == '\0' || errno != 0 || (long)(int)val != val) { + std::cerr << "Invalid number: " << str << std::endl; + exit(1); + } + return std::make_pair(val, endp); +} + +static Fieldspec parse_fieldspec(const char *str) { + Fieldspec spec; + while (true) { + int num; + const char *endp; + std::tie(num, endp) = parse_int(str); + if (*endp == '-') { + int num2; + std::tie(num2, endp) = parse_int(endp + 1); + spec.ranges.push_back(Range{num - 1, num2 - 1}); + } else { + spec.ranges.push_back(Range{num - 1, num - 1}); + } + if (*endp == '\0') break; + if (*endp == ',') str = endp + 1; + else { + std::cerr << "Invalid character in field spec: '" << *endp << "'" << std::endl; + exit(1); + } + } + return spec; +} + +static void collect(std::string &dest, const Fieldspec &spec, const std::vector &parts, char sepchar) { + dest.clear(); + for (const Range &range : spec.ranges) { + for (int i = range.from; i <= std::min(range.to, parts.size() - 1); i++) { + if (i > range.from) dest += sepchar; + dest += parts[i]; + } + } +}; + +struct Spaces { + const int n; + Spaces(int n) : n{n} {} +}; +static std::ostream& operator<<(std::ostream &os, Spaces spaces) { + for (int i = 0; i < spaces.n; i++) os << ' '; + return os; +} + +static int swidth(const std::string &s) { + int len = 0; + for (char c : s) len += (c & 0xc0) != 0x80; + return len; +} + +enum class Dir { left, center, right }; + +template +struct AlignBase { + T s; + const int outw, sw; + AlignBase(T s, int outw, int sw) : s{s}, outw{outw}, sw{sw} {} +}; +using Left = AlignBase; +using Center = AlignBase; +using Right = AlignBase; + +template +std::ostream& operator<<(std::ostream &os, AlignBase al) { + return os << al.s << Spaces(al.outw - al.sw); +} + +template +std::ostream& operator<<(std::ostream &os, AlignBase al) { + const int n = al.outw - al.sw; + return os << Spaces((n + 1) / 2) << al.s << Spaces(n / 2); +} + +template +std::ostream& operator<<(std::ostream &os, AlignBase al) { + return os << Spaces(al.outw - al.sw) << al.s; +} + +int main(int argc, char **argv) { + char sepchar = ' '; + std::vector fieldspecs; + fieldspecs.reserve(4); + + for (int i = 1; i < argc; i++) { + if (argv[i][0] != '-') { + fieldspecs.push_back(parse_fieldspec(argv[i])); + } else if (strcmp(argv[i], "--help") == 0) { + usage(argv[0]); + return 0; + } else { + for (int j = 1; argv[i][j]; j++) { + switch (argv[i][j]) { + case 'h': usage(argv[0]); return 0; + case 's': + if (argv[i][j+1] != '\0') { + sepchar = argv[i][j+1]; + j++; + } else if (i + 1 < argc && argv[i+1][1] == '\0') { + sepchar = argv[i+1][1]; + } else { + std::cerr << "Argument to '-s' missing or multiple bytes" << std::endl; + return 1; + } + break; + default: + std::cerr << "Invalid option '-" << argv[i][j] << "'" << std::endl; + return 1; + } + } + } + } + +#define TAB 0 +#define ROW 1 +#define COL 2 +#define VAL 3 + + Fieldspec specs[4]; + switch (fieldspecs.size()) { + case 2: + specs[ROW] = std::move(fieldspecs[0]); + specs[VAL] = std::move(fieldspecs[1]); + break; + case 3: + specs[ROW] = std::move(fieldspecs[0]); + specs[COL] = std::move(fieldspecs[1]); + specs[VAL] = std::move(fieldspecs[2]); + break; + case 4: + specs[TAB] = std::move(fieldspecs[0]); + specs[ROW] = std::move(fieldspecs[1]); + specs[COL] = std::move(fieldspecs[2]); + specs[VAL] = std::move(fieldspecs[3]); + break; + default: + std::cerr << "Unexpected number of field specs; expected 2, 3 or 4" << std::endl; + return 1; + } + + // Need a box around the std::string to make sure their buffer stays stable + // even if it does small-string optimisation + std::vector> labels[3]; + std::map, std::string> values; // key: indices in tab, row, col + + { + // string_views refer to the strings in labels + std::unordered_map labels_idx[3]; // only tab, row, col + + std::string line; + std::vector parts; + std::string texts[4]; + while (std::getline(std::cin, line)) { + parts.clear(); + size_t cursor = 0; + while (cursor < line.size()) { + size_t idx = line.find(sepchar, cursor); + if (idx == std::string::npos) idx = line.size(); + parts.emplace_back(line, cursor, idx - cursor); + cursor = idx + 1; + } + if (parts.size() == 0) continue; // empty line, no fields + + for (int i = 0; i < 4; i++) collect(texts[i], specs[i], parts, sepchar); + + // check that all parts we need are indeed there + bool present = true; + for (int i = 0; i < 4; i++) { + if (!specs[i].empty() && texts[i].empty()) { + present = false; + break; + } + } + if (!present) continue; + + // add to the label lists and collect indices + int idxs[3]; + for (int i = 0; i < 3; i++) { + auto it = labels_idx[i].find(texts[i]); + if (it == labels_idx[i].end()) { + idxs[i] = labels[i].size(); + labels[i].push_back(std::make_unique(std::move(texts[i]))); + texts[i].clear(); + labels_idx[i].emplace(*labels[i].back(), idxs[i]); + } else { + idxs[i] = it->second; + } + } + + // store the value at the appropriate index triplet + values.emplace(std::make_tuple(idxs[TAB], idxs[ROW], idxs[COL]), std::move(texts[VAL])); + } + } + + int leftwid = 0; // does not include table name + for (const std::unique_ptr &rowname : labels[ROW]) + leftwid = std::max(leftwid, swidth(*rowname)); + + std::vector collabwid(labels[COL].size()); + for (int coli = 0; coli < (int)labels[COL].size(); coli++) + collabwid[coli] = swidth(*labels[COL][coli]); + + std::vector rowlabwid(labels[ROW].size()); + for (int rowi = 0; rowi < (int)labels[ROW].size(); rowi++) + rowlabwid[rowi] = swidth(*labels[ROW][rowi]); + + for (int tabi = 0; tabi < (int)labels[TAB].size(); tabi++) { + if (tabi > 0) std::cout << '\n'; + + const std::string &tabname = *labels[TAB][tabi]; + const int thisleftwid = std::max(leftwid, swidth(tabname)); + + std::vector colvalwid(labels[COL].size()); + for (int coli = 0; coli < (int)labels[COL].size(); coli++) { + for (int rowi = 0; rowi < (int)labels[ROW].size(); rowi++) { + auto it = values.find(std::make_tuple(tabi, rowi, coli)); + if (it != values.end()) + colvalwid[coli] = std::max(colvalwid[coli], swidth(it->second)); + } + } + + std::vector colwid(labels[COL].size()); + for (int coli = 0; coli < (int)labels[COL].size(); coli++) + colwid[coli] = std::max(collabwid[coli], colvalwid[coli]); + + if (!specs[COL].empty()) { + std::cout << Left(tabname, thisleftwid, swidth(tabname)); + for (int coli = 0; coli < (int)labels[COL].size(); coli++) { + std::cout << ' ' << Left(*labels[COL][coli], colwid[coli], collabwid[coli]); + } + std::cout << '\n'; + } + + for (int rowi = 0; rowi < (int)labels[ROW].size(); rowi++) { + std::cout << Left(*labels[ROW][rowi], thisleftwid, rowlabwid[rowi]); + + for (int coli = 0; coli < (int)labels[COL].size(); coli++) { + auto it = values.find(std::make_tuple(tabi, rowi, coli)); + if (it != values.end()) { + std::cout << ' ' << AlignBase(Right(it->second, colvalwid[coli], swidth(it->second)), colwid[coli], colvalwid[coli]); + } else { + std::cout << Spaces(colwid[coli] + 1); + } + } + std::cout << '\n'; + } + } + + std::cout << std::flush; +} -- cgit v1.2.3-70-g09d2