summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Smeding <tom@tomsmeding.com>2025-10-16 21:49:02 +0200
committerTom Smeding <tom@tomsmeding.com>2025-10-16 21:49:02 +0200
commitb006ae324da88e3280914b3d00585a740057d4c8 (patch)
treeccab466eb83f32a32b951cd8b3b360d0e4b66a81
InitialHEADmaster
-rw-r--r--.gitignore2
-rw-r--r--Makefile22
-rw-r--r--example.txt8
-rw-r--r--tabulate.cpp302
4 files changed, 334 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1d9613c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.o
+tabulate
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..00953bd
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,22 @@
+CXX = g++
+CXXFLAGS = -Wall -Wextra -std=c++17 -fwrapv
+ifneq ($(DEBUG),)
+ CXXFLAGS += -g
+else
+ CXXFLAGS += -O2
+endif
+BIN = tabulate
+
+.PHONY: all clean
+
+all: $(BIN)
+
+clean:
+ rm -f $(BIN) *.o
+
+
+$(BIN): $(patsubst %.cpp,%.o,$(wildcard *.cpp))
+ $(CXX) -o $@ $^
+
+%.o: %.cpp $(wildcard *.h)
+ $(CXX) $(CXXFLAGS) -c -o $@ $<
diff --git a/example.txt b/example.txt
new file mode 100644
index 0000000..b2b7e1a
--- /dev/null
+++ b/example.txt
@@ -0,0 +1,8 @@
+after neural/default 106.6 μs
+after neural/accum 107.0 μs
+after gmm/default 1.931 ms
+after gmm/accum 1.780 ms
+before neural/default 157.1 μs
+before neural/accum 117.9 μs
+before gmm/default 2.087 ms
+before gmm/accum 2.040 ms
diff --git a/tabulate.cpp b/tabulate.cpp
new file mode 100644
index 0000000..f5cfb4f
--- /dev/null
+++ b/tabulate.cpp
@@ -0,0 +1,302 @@
+#include <iostream>
+#include <vector>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <unordered_map>
+#include <map>
+#include <tuple>
+#include <memory>
+#include <cstdlib>
+#include <cstring>
+#include <cerrno>
+
+
+static void usage(const char *argv0) {
+ std::cerr <<
+ "Usage: " << argv0 << " [options] <row> <value>\n"
+ " " << argv0 << " [options] <row> <column> <value>\n"
+ " " << argv0 << " [options] <table> <row> <column> <value>\n"
+ "Splits lines on a separator character (-s) and creates tables from the result.\n"
+ "Each positional argument is a field index specification (after splitting) that\n"
+ "determines which parts of the input lines get used for what purpose.\n"
+ "A field index specification is like that of the '-f' flag to cut(1). If multiple\n"
+ "fields are specified for a particular purpose, they are joined using the\n"
+ "separator character and henceforth treated as a single string. If a table cell\n"
+ "has multiple values in the input, the values are appended using the separator\n"
+ "character.\n"
+ "Row, column and table labels are printed in order of occurrence in the input.\n"
+ "UTF-8 is assumed for string-length calculation.\n"
+ "\n"
+ "Options:\n"
+ " -h Show help.\n"
+ " -s DELIM The character that separates fields. Default space (' ').\n";
+}
+
+struct Range {
+ int from, to;
+};
+
+struct Fieldspec {
+ std::vector<Range> ranges;
+ bool empty() const { return ranges.empty(); }
+};
+
+static std::pair<int, const char*> parse_int(const char *str) {
+ errno = 0;
+ const char *endp = NULL;
+ long val = strtol(str, (char**)&endp, 10);
+ if (str[0] == '\0' || errno != 0 || (long)(int)val != val) {
+ std::cerr << "Invalid number: " << str << std::endl;
+ exit(1);
+ }
+ return std::make_pair(val, endp);
+}
+
+static Fieldspec parse_fieldspec(const char *str) {
+ Fieldspec spec;
+ while (true) {
+ int num;
+ const char *endp;
+ std::tie(num, endp) = parse_int(str);
+ if (*endp == '-') {
+ int num2;
+ std::tie(num2, endp) = parse_int(endp + 1);
+ spec.ranges.push_back(Range{num - 1, num2 - 1});
+ } else {
+ spec.ranges.push_back(Range{num - 1, num - 1});
+ }
+ if (*endp == '\0') break;
+ if (*endp == ',') str = endp + 1;
+ else {
+ std::cerr << "Invalid character in field spec: '" << *endp << "'" << std::endl;
+ exit(1);
+ }
+ }
+ return spec;
+}
+
+static void collect(std::string &dest, const Fieldspec &spec, const std::vector<std::string> &parts, char sepchar) {
+ dest.clear();
+ for (const Range &range : spec.ranges) {
+ for (int i = range.from; i <= std::min<int>(range.to, parts.size() - 1); i++) {
+ if (i > range.from) dest += sepchar;
+ dest += parts[i];
+ }
+ }
+};
+
+struct Spaces {
+ const int n;
+ Spaces(int n) : n{n} {}
+};
+static std::ostream& operator<<(std::ostream &os, Spaces spaces) {
+ for (int i = 0; i < spaces.n; i++) os << ' ';
+ return os;
+}
+
+static int swidth(const std::string &s) {
+ int len = 0;
+ for (char c : s) len += (c & 0xc0) != 0x80;
+ return len;
+}
+
+enum class Dir { left, center, right };
+
+template <typename T, Dir dir>
+struct AlignBase {
+ T s;
+ const int outw, sw;
+ AlignBase(T s, int outw, int sw) : s{s}, outw{outw}, sw{sw} {}
+};
+using Left = AlignBase<const std::string&, Dir::left>;
+using Center = AlignBase<const std::string&, Dir::center>;
+using Right = AlignBase<const std::string&, Dir::right>;
+
+template <typename T>
+std::ostream& operator<<(std::ostream &os, AlignBase<T, Dir::left> al) {
+ return os << al.s << Spaces(al.outw - al.sw);
+}
+
+template <typename T>
+std::ostream& operator<<(std::ostream &os, AlignBase<T, Dir::center> al) {
+ const int n = al.outw - al.sw;
+ return os << Spaces((n + 1) / 2) << al.s << Spaces(n / 2);
+}
+
+template <typename T>
+std::ostream& operator<<(std::ostream &os, AlignBase<T, Dir::right> al) {
+ return os << Spaces(al.outw - al.sw) << al.s;
+}
+
+int main(int argc, char **argv) {
+ char sepchar = ' ';
+ std::vector<Fieldspec> fieldspecs;
+ fieldspecs.reserve(4);
+
+ for (int i = 1; i < argc; i++) {
+ if (argv[i][0] != '-') {
+ fieldspecs.push_back(parse_fieldspec(argv[i]));
+ } else if (strcmp(argv[i], "--help") == 0) {
+ usage(argv[0]);
+ return 0;
+ } else {
+ for (int j = 1; argv[i][j]; j++) {
+ switch (argv[i][j]) {
+ case 'h': usage(argv[0]); return 0;
+ case 's':
+ if (argv[i][j+1] != '\0') {
+ sepchar = argv[i][j+1];
+ j++;
+ } else if (i + 1 < argc && argv[i+1][1] == '\0') {
+ sepchar = argv[i+1][1];
+ } else {
+ std::cerr << "Argument to '-s' missing or multiple bytes" << std::endl;
+ return 1;
+ }
+ break;
+ default:
+ std::cerr << "Invalid option '-" << argv[i][j] << "'" << std::endl;
+ return 1;
+ }
+ }
+ }
+ }
+
+#define TAB 0
+#define ROW 1
+#define COL 2
+#define VAL 3
+
+ Fieldspec specs[4];
+ switch (fieldspecs.size()) {
+ case 2:
+ specs[ROW] = std::move(fieldspecs[0]);
+ specs[VAL] = std::move(fieldspecs[1]);
+ break;
+ case 3:
+ specs[ROW] = std::move(fieldspecs[0]);
+ specs[COL] = std::move(fieldspecs[1]);
+ specs[VAL] = std::move(fieldspecs[2]);
+ break;
+ case 4:
+ specs[TAB] = std::move(fieldspecs[0]);
+ specs[ROW] = std::move(fieldspecs[1]);
+ specs[COL] = std::move(fieldspecs[2]);
+ specs[VAL] = std::move(fieldspecs[3]);
+ break;
+ default:
+ std::cerr << "Unexpected number of field specs; expected 2, 3 or 4" << std::endl;
+ return 1;
+ }
+
+ // Need a box around the std::string to make sure their buffer stays stable
+ // even if it does small-string optimisation
+ std::vector<std::unique_ptr<std::string>> labels[3];
+ std::map<std::tuple<int, int, int>, std::string> values; // key: indices in tab, row, col
+
+ {
+ // string_views refer to the strings in labels
+ std::unordered_map<std::string_view, int> labels_idx[3]; // only tab, row, col
+
+ std::string line;
+ std::vector<std::string> parts;
+ std::string texts[4];
+ while (std::getline(std::cin, line)) {
+ parts.clear();
+ size_t cursor = 0;
+ while (cursor < line.size()) {
+ size_t idx = line.find(sepchar, cursor);
+ if (idx == std::string::npos) idx = line.size();
+ parts.emplace_back(line, cursor, idx - cursor);
+ cursor = idx + 1;
+ }
+ if (parts.size() == 0) continue; // empty line, no fields
+
+ for (int i = 0; i < 4; i++) collect(texts[i], specs[i], parts, sepchar);
+
+ // check that all parts we need are indeed there
+ bool present = true;
+ for (int i = 0; i < 4; i++) {
+ if (!specs[i].empty() && texts[i].empty()) {
+ present = false;
+ break;
+ }
+ }
+ if (!present) continue;
+
+ // add to the label lists and collect indices
+ int idxs[3];
+ for (int i = 0; i < 3; i++) {
+ auto it = labels_idx[i].find(texts[i]);
+ if (it == labels_idx[i].end()) {
+ idxs[i] = labels[i].size();
+ labels[i].push_back(std::make_unique<std::string>(std::move(texts[i])));
+ texts[i].clear();
+ labels_idx[i].emplace(*labels[i].back(), idxs[i]);
+ } else {
+ idxs[i] = it->second;
+ }
+ }
+
+ // store the value at the appropriate index triplet
+ values.emplace(std::make_tuple(idxs[TAB], idxs[ROW], idxs[COL]), std::move(texts[VAL]));
+ }
+ }
+
+ int leftwid = 0; // does not include table name
+ for (const std::unique_ptr<std::string> &rowname : labels[ROW])
+ leftwid = std::max<int>(leftwid, swidth(*rowname));
+
+ std::vector<int> collabwid(labels[COL].size());
+ for (int coli = 0; coli < (int)labels[COL].size(); coli++)
+ collabwid[coli] = swidth(*labels[COL][coli]);
+
+ std::vector<int> rowlabwid(labels[ROW].size());
+ for (int rowi = 0; rowi < (int)labels[ROW].size(); rowi++)
+ rowlabwid[rowi] = swidth(*labels[ROW][rowi]);
+
+ for (int tabi = 0; tabi < (int)labels[TAB].size(); tabi++) {
+ if (tabi > 0) std::cout << '\n';
+
+ const std::string &tabname = *labels[TAB][tabi];
+ const int thisleftwid = std::max<int>(leftwid, swidth(tabname));
+
+ std::vector<int> colvalwid(labels[COL].size());
+ for (int coli = 0; coli < (int)labels[COL].size(); coli++) {
+ for (int rowi = 0; rowi < (int)labels[ROW].size(); rowi++) {
+ auto it = values.find(std::make_tuple(tabi, rowi, coli));
+ if (it != values.end())
+ colvalwid[coli] = std::max<int>(colvalwid[coli], swidth(it->second));
+ }
+ }
+
+ std::vector<int> colwid(labels[COL].size());
+ for (int coli = 0; coli < (int)labels[COL].size(); coli++)
+ colwid[coli] = std::max(collabwid[coli], colvalwid[coli]);
+
+ if (!specs[COL].empty()) {
+ std::cout << Left(tabname, thisleftwid, swidth(tabname));
+ for (int coli = 0; coli < (int)labels[COL].size(); coli++) {
+ std::cout << ' ' << Left(*labels[COL][coli], colwid[coli], collabwid[coli]);
+ }
+ std::cout << '\n';
+ }
+
+ for (int rowi = 0; rowi < (int)labels[ROW].size(); rowi++) {
+ std::cout << Left(*labels[ROW][rowi], thisleftwid, rowlabwid[rowi]);
+
+ for (int coli = 0; coli < (int)labels[COL].size(); coli++) {
+ auto it = values.find(std::make_tuple(tabi, rowi, coli));
+ if (it != values.end()) {
+ std::cout << ' ' << AlignBase<Right, Dir::center>(Right(it->second, colvalwid[coli], swidth(it->second)), colwid[coli], colvalwid[coli]);
+ } else {
+ std::cout << Spaces(colwid[coli] + 1);
+ }
+ }
+ std::cout << '\n';
+ }
+ }
+
+ std::cout << std::flush;
+}