From b006ae324da88e3280914b3d00585a740057d4c8 Mon Sep 17 00:00:00 2001
From: Tom Smeding <tom@tomsmeding.com>
Date: Thu, 16 Oct 2025 21:49:02 +0200
Subject: Initial

---
 .gitignore   |   2 +
 Makefile     |  22 +++++
 example.txt  |   8 ++
 tabulate.cpp | 302 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 334 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Makefile
 create mode 100644 example.txt
 create mode 100644 tabulate.cpp
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1d9613c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.o
+tabulate
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..00953bd
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,22 @@
+CXX = g++
+CXXFLAGS = -Wall -Wextra -std=c++17 -fwrapv
+ifneq ($(DEBUG),)
+	CXXFLAGS += -g
+else
+	CXXFLAGS += -O2
+endif
+BIN = tabulate
+
+.PHONY: all clean
+
+all: $(BIN)
+
+clean:
+	rm -f $(BIN) *.o
+
+
+$(BIN): $(patsubst %.cpp,%.o,$(wildcard *.cpp))
+	$(CXX) -o $@ $^
+
+%.o: %.cpp $(wildcard *.h)
+	$(CXX) $(CXXFLAGS) -c -o $@ $<
diff --git a/example.txt b/example.txt
new file mode 100644
index 0000000..b2b7e1a
--- /dev/null
+++ b/example.txt
@@ -0,0 +1,8 @@
+after neural/default 106.6 μs
+after neural/accum 107.0 μs
+after gmm/default 1.931 ms
+after gmm/accum 1.780 ms
+before neural/default 157.1 μs
+before neural/accum 117.9 μs
+before gmm/default 2.087 ms
+before gmm/accum 2.040 ms
diff --git a/tabulate.cpp b/tabulate.cpp
new file mode 100644
index 0000000..f5cfb4f
--- /dev/null
+++ b/tabulate.cpp
@@ -0,0 +1,302 @@
+#include <iostream>
+#include <vector>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <unordered_map>
+#include <map>
+#include <tuple>
+#include <memory>
+#include <cstdlib>
+#include <cstring>
+#include <cerrno>
+
+
+static void usage(const char *argv0) {
+  std::cerr <<
+    "Usage: " << argv0 << " [options] <row> <value>\n"
+    "       " << argv0 << " [options] <row> <column> <value>\n"
+    "       " << argv0 << " [options] <table> <row> <column> <value>\n"
+    "Splits lines on a separator character (-s) and creates tables from the result.\n"
+    "Each positional argument is a field index specification (after splitting) that\n"
+    "determines which parts of the input lines get used for what purpose.\n"
+    "A field index specification is like that of the '-f' flag to cut(1). If multiple\n"
+    "fields are specified for a particular purpose, they are joined using the\n"
+    "separator character and henceforth treated as a single string. If a table cell\n"
+    "has multiple values in the input, the values are appended using the separator\n"
+    "character.\n"
+    "Row, column and table labels are printed in order of occurrence in the input.\n"
+    "UTF-8 is assumed for string-length calculation.\n"
+    "\n"
+    "Options:\n"
+    "  -h        Show help.\n"
+    "  -s DELIM  The character that separates fields. Default space (' ').\n";
+}
+
+struct Range {
+  int from, to;
+};
+
+struct Fieldspec {
+  std::vector<Range> ranges;
+  bool empty() const { return ranges.empty(); }
+};
+
+static std::pair<int, const char*> parse_int(const char *str) {
+  errno = 0;
+  const char *endp = NULL;
+  long val = strtol(str, (char**)&endp, 10);
+  if (str[0] == '\0' || errno != 0 || (long)(int)val != val) {
+    std::cerr << "Invalid number: " << str << std::endl;
+    exit(1);
+  }
+  return std::make_pair(val, endp);
+}
+
+static Fieldspec parse_fieldspec(const char *str) {
+  Fieldspec spec;
+  while (true) {
+    int num;
+    const char *endp;
+    std::tie(num, endp) = parse_int(str);
+    if (*endp == '-') {
+      int num2;
+      std::tie(num2, endp) = parse_int(endp + 1);
+      spec.ranges.push_back(Range{num - 1, num2 - 1});
+    } else {
+      spec.ranges.push_back(Range{num - 1, num - 1});
+    }
+    if (*endp == '\0') break;
+    if (*endp == ',') str = endp + 1;
+    else {
+      std::cerr << "Invalid character in field spec: '" << *endp << "'" << std::endl;
+      exit(1);
+    }
+  }
+  return spec;
+}
+
+static void collect(std::string &dest, const Fieldspec &spec, const std::vector<std::string> &parts, char sepchar) {
+  dest.clear();
+  for (const Range &range : spec.ranges) {
+    for (int i = range.from; i <= std::min<int>(range.to, parts.size() - 1); i++) {
+      if (i > range.from) dest += sepchar;
+      dest += parts[i];
+    }
+  }
+};
+
+struct Spaces {
+  const int n;
+  Spaces(int n) : n{n} {}
+};
+static std::ostream& operator<<(std::ostream &os, Spaces spaces) {
+  for (int i = 0; i < spaces.n; i++) os << ' ';
+  return os;
+}
+
+static int swidth(const std::string &s) {
+  int len = 0;
+  for (char c : s) len += (c & 0xc0) != 0x80;
+  return len;
+}
+
+enum class Dir { left, center, right };
+
+template <typename T, Dir dir>
+struct AlignBase {
+  T s;
+  const int outw, sw;
+  AlignBase(T s, int outw, int sw) : s{s}, outw{outw}, sw{sw} {}
+};
+using Left = AlignBase<const std::string&, Dir::left>;
+using Center = AlignBase<const std::string&, Dir::center>;
+using Right = AlignBase<const std::string&, Dir::right>;
+
+template <typename T>
+std::ostream& operator<<(std::ostream &os, AlignBase<T, Dir::left> al) {
+  return os << al.s << Spaces(al.outw - al.sw);
+}
+
+template <typename T>
+std::ostream& operator<<(std::ostream &os, AlignBase<T, Dir::center> al) {
+  const int n = al.outw - al.sw;
+  return os << Spaces((n + 1) / 2) << al.s << Spaces(n / 2);
+}
+
+template <typename T>
+std::ostream& operator<<(std::ostream &os, AlignBase<T, Dir::right> al) {
+  return os << Spaces(al.outw - al.sw) << al.s;
+}
+
+int main(int argc, char **argv) {
+  char sepchar = ' ';
+  std::vector<Fieldspec> fieldspecs;
+  fieldspecs.reserve(4);
+
+  for (int i = 1; i < argc; i++) {
+    if (argv[i][0] != '-') {
+      fieldspecs.push_back(parse_fieldspec(argv[i]));
+    } else if (strcmp(argv[i], "--help") == 0) {
+      usage(argv[0]);
+      return 0;
+    } else {
+      for (int j = 1; argv[i][j]; j++) {
+        switch (argv[i][j]) {
+          case 'h': usage(argv[0]); return 0;
+          case 's':
+            if (argv[i][j+1] != '\0') {
+              sepchar = argv[i][j+1];
+              j++;
+            } else if (i + 1 < argc && argv[i+1][1] == '\0') {
+              sepchar = argv[i+1][1];
+            } else {
+              std::cerr << "Argument to '-s' missing or multiple bytes" << std::endl;
+              return 1;
+            }
+            break;
+          default:
+            std::cerr << "Invalid option '-" << argv[i][j] << "'" << std::endl;
+            return 1;
+        }
+      }
+    }
+  }
+
+#define TAB 0
+#define ROW 1
+#define COL 2
+#define VAL 3
+
+  Fieldspec specs[4];
+  switch (fieldspecs.size()) {
+    case 2:
+      specs[ROW] = std::move(fieldspecs[0]);
+      specs[VAL] = std::move(fieldspecs[1]);
+      break;
+    case 3:
+      specs[ROW] = std::move(fieldspecs[0]);
+      specs[COL] = std::move(fieldspecs[1]);
+      specs[VAL] = std::move(fieldspecs[2]);
+      break;
+    case 4:
+      specs[TAB] = std::move(fieldspecs[0]);
+      specs[ROW] = std::move(fieldspecs[1]);
+      specs[COL] = std::move(fieldspecs[2]);
+      specs[VAL] = std::move(fieldspecs[3]);
+      break;
+    default:
+      std::cerr << "Unexpected number of field specs; expected 2, 3 or 4" << std::endl;
+      return 1;
+  }
+
+  // Need a box around the std::string to make sure their buffer stays stable
+  // even if it does small-string optimisation
+  std::vector<std::unique_ptr<std::string>> labels[3];
+  std::map<std::tuple<int, int, int>, std::string> values;  // key: indices in tab, row, col
+
+  {
+    // string_views refer to the strings in labels
+    std::unordered_map<std::string_view, int> labels_idx[3];  // only tab, row, col
+
+    std::string line;
+    std::vector<std::string> parts;
+    std::string texts[4];
+    while (std::getline(std::cin, line)) {
+      parts.clear();
+      size_t cursor = 0;
+      while (cursor < line.size()) {
+        size_t idx = line.find(sepchar, cursor);
+        if (idx == std::string::npos) idx = line.size();
+        parts.emplace_back(line, cursor, idx - cursor);
+        cursor = idx + 1;
+      }
+      if (parts.size() == 0) continue;  // empty line, no fields
+
+      for (int i = 0; i < 4; i++) collect(texts[i], specs[i], parts, sepchar);
+
+      // check that all parts we need are indeed there
+      bool present = true;
+      for (int i = 0; i < 4; i++) {
+        if (!specs[i].empty() && texts[i].empty()) {
+          present = false;
+          break;
+        }
+      }
+      if (!present) continue;
+
+      // add to the label lists and collect indices
+      int idxs[3];
+      for (int i = 0; i < 3; i++) {
+        auto it = labels_idx[i].find(texts[i]);
+        if (it == labels_idx[i].end()) {
+          idxs[i] = labels[i].size();
+          labels[i].push_back(std::make_unique<std::string>(std::move(texts[i])));
+          texts[i].clear();
+          labels_idx[i].emplace(*labels[i].back(), idxs[i]);
+        } else {
+          idxs[i] = it->second;
+        }
+      }
+
+      // store the value at the appropriate index triplet
+      values.emplace(std::make_tuple(idxs[TAB], idxs[ROW], idxs[COL]), std::move(texts[VAL]));
+    }
+  }
+
+  int leftwid = 0;  // does not include table name
+  for (const std::unique_ptr<std::string> &rowname : labels[ROW])
+    leftwid = std::max<int>(leftwid, swidth(*rowname));
+
+  std::vector<int> collabwid(labels[COL].size());
+  for (int coli = 0; coli < (int)labels[COL].size(); coli++)
+    collabwid[coli] = swidth(*labels[COL][coli]);
+
+  std::vector<int> rowlabwid(labels[ROW].size());
+  for (int rowi = 0; rowi < (int)labels[ROW].size(); rowi++)
+    rowlabwid[rowi] = swidth(*labels[ROW][rowi]);
+
+  for (int tabi = 0; tabi < (int)labels[TAB].size(); tabi++) {
+    if (tabi > 0) std::cout << '\n';
+
+    const std::string &tabname = *labels[TAB][tabi];
+    const int thisleftwid = std::max<int>(leftwid, swidth(tabname));
+
+    std::vector<int> colvalwid(labels[COL].size());
+    for (int coli = 0; coli < (int)labels[COL].size(); coli++) {
+      for (int rowi = 0; rowi < (int)labels[ROW].size(); rowi++) {
+        auto it = values.find(std::make_tuple(tabi, rowi, coli));
+        if (it != values.end())
+          colvalwid[coli] = std::max<int>(colvalwid[coli], swidth(it->second));
+      }
+    }
+
+    std::vector<int> colwid(labels[COL].size());
+    for (int coli = 0; coli < (int)labels[COL].size(); coli++)
+      colwid[coli] = std::max(collabwid[coli], colvalwid[coli]);
+
+    if (!specs[COL].empty()) {
+      std::cout << Left(tabname, thisleftwid, swidth(tabname));
+      for (int coli = 0; coli < (int)labels[COL].size(); coli++) {
+        std::cout << ' ' << Left(*labels[COL][coli], colwid[coli], collabwid[coli]);
+      }
+      std::cout << '\n';
+    }
+
+    for (int rowi = 0; rowi < (int)labels[ROW].size(); rowi++) {
+      std::cout << Left(*labels[ROW][rowi], thisleftwid, rowlabwid[rowi]);
+
+      for (int coli = 0; coli < (int)labels[COL].size(); coli++) {
+        auto it = values.find(std::make_tuple(tabi, rowi, coli));
+        if (it != values.end()) {
+          std::cout << ' ' << AlignBase<Right, Dir::center>(Right(it->second, colvalwid[coli], swidth(it->second)), colwid[coli], colvalwid[coli]);
+        } else {
+          std::cout << Spaces(colwid[coli] + 1);
+        }
+      }
+      std::cout << '\n';
+    }
+  }
+
+  std::cout << std::flush;
+}
-- 
cgit v1.3.1