diff options
| author | Tom Smeding <tom@tomsmeding.com> | 2025-10-16 21:49:02 +0200 | 
|---|---|---|
| committer | Tom Smeding <tom@tomsmeding.com> | 2025-10-16 21:49:02 +0200 | 
| commit | b006ae324da88e3280914b3d00585a740057d4c8 (patch) | |
| tree | ccab466eb83f32a32b951cd8b3b360d0e4b66a81 | |
| -rw-r--r-- | .gitignore | 2 | ||||
| -rw-r--r-- | Makefile | 22 | ||||
| -rw-r--r-- | example.txt | 8 | ||||
| -rw-r--r-- | tabulate.cpp | 302 | 
4 files changed, 334 insertions, 0 deletions
| diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1d9613c --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.o +tabulate diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..00953bd --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ +CXX = g++ +CXXFLAGS = -Wall -Wextra -std=c++17 -fwrapv +ifneq ($(DEBUG),) +	CXXFLAGS += -g +else +	CXXFLAGS += -O2 +endif +BIN = tabulate + +.PHONY: all clean + +all: $(BIN) + +clean: +	rm -f $(BIN) *.o + + +$(BIN): $(patsubst %.cpp,%.o,$(wildcard *.cpp)) +	$(CXX) -o $@ $^ + +%.o: %.cpp $(wildcard *.h) +	$(CXX) $(CXXFLAGS) -c -o $@ $< diff --git a/example.txt b/example.txt new file mode 100644 index 0000000..b2b7e1a --- /dev/null +++ b/example.txt @@ -0,0 +1,8 @@ +after neural/default 106.6 μs +after neural/accum 107.0 μs +after gmm/default 1.931 ms +after gmm/accum 1.780 ms +before neural/default 157.1 μs +before neural/accum 117.9 μs +before gmm/default 2.087 ms +before gmm/accum 2.040 ms diff --git a/tabulate.cpp b/tabulate.cpp new file mode 100644 index 0000000..f5cfb4f --- /dev/null +++ b/tabulate.cpp @@ -0,0 +1,302 @@ +#include <iostream> +#include <vector> +#include <string> +#include <string_view> +#include <unordered_set> +#include <unordered_map> +#include <map> +#include <tuple> +#include <memory> +#include <cstdlib> +#include <cstring> +#include <cerrno> + + +static void usage(const char *argv0) { +  std::cerr << +    "Usage: " << argv0 << " [options] <row> <value>\n" +    "       " << argv0 << " [options] <row> <column> <value>\n" +    "       " << argv0 << " [options] <table> <row> <column> <value>\n" +    "Splits lines on a separator character (-s) and creates tables from the result.\n" +    "Each positional argument is a field index specification (after splitting) that\n" +    "determines which parts of the input lines get used for what purpose.\n" +    "A field index specification is like that of the '-f' flag to cut(1). If multiple\n" +    "fields are specified for a particular purpose, they are joined using the\n" +    "separator character and henceforth treated as a single string. If a table cell\n" +    "has multiple values in the input, the values are appended using the separator\n" +    "character.\n" +    "Row, column and table labels are printed in order of occurrence in the input.\n" +    "UTF-8 is assumed for string-length calculation.\n" +    "\n" +    "Options:\n" +    "  -h        Show help.\n" +    "  -s DELIM  The character that separates fields. Default space (' ').\n"; +} + +struct Range { +  int from, to; +}; + +struct Fieldspec { +  std::vector<Range> ranges; +  bool empty() const { return ranges.empty(); } +}; + +static std::pair<int, const char*> parse_int(const char *str) { +  errno = 0; +  const char *endp = NULL; +  long val = strtol(str, (char**)&endp, 10); +  if (str[0] == '\0' || errno != 0 || (long)(int)val != val) { +    std::cerr << "Invalid number: " << str << std::endl; +    exit(1); +  } +  return std::make_pair(val, endp); +} + +static Fieldspec parse_fieldspec(const char *str) { +  Fieldspec spec; +  while (true) { +    int num; +    const char *endp; +    std::tie(num, endp) = parse_int(str); +    if (*endp == '-') { +      int num2; +      std::tie(num2, endp) = parse_int(endp + 1); +      spec.ranges.push_back(Range{num - 1, num2 - 1}); +    } else { +      spec.ranges.push_back(Range{num - 1, num - 1}); +    } +    if (*endp == '\0') break; +    if (*endp == ',') str = endp + 1; +    else { +      std::cerr << "Invalid character in field spec: '" << *endp << "'" << std::endl; +      exit(1); +    } +  } +  return spec; +} + +static void collect(std::string &dest, const Fieldspec &spec, const std::vector<std::string> &parts, char sepchar) { +  dest.clear(); +  for (const Range &range : spec.ranges) { +    for (int i = range.from; i <= std::min<int>(range.to, parts.size() - 1); i++) { +      if (i > range.from) dest += sepchar; +      dest += parts[i]; +    } +  } +}; + +struct Spaces { +  const int n; +  Spaces(int n) : n{n} {} +}; +static std::ostream& operator<<(std::ostream &os, Spaces spaces) { +  for (int i = 0; i < spaces.n; i++) os << ' '; +  return os; +} + +static int swidth(const std::string &s) { +  int len = 0; +  for (char c : s) len += (c & 0xc0) != 0x80; +  return len; +} + +enum class Dir { left, center, right }; + +template <typename T, Dir dir> +struct AlignBase { +  T s; +  const int outw, sw; +  AlignBase(T s, int outw, int sw) : s{s}, outw{outw}, sw{sw} {} +}; +using Left = AlignBase<const std::string&, Dir::left>; +using Center = AlignBase<const std::string&, Dir::center>; +using Right = AlignBase<const std::string&, Dir::right>; + +template <typename T> +std::ostream& operator<<(std::ostream &os, AlignBase<T, Dir::left> al) { +  return os << al.s << Spaces(al.outw - al.sw); +} + +template <typename T> +std::ostream& operator<<(std::ostream &os, AlignBase<T, Dir::center> al) { +  const int n = al.outw - al.sw; +  return os << Spaces((n + 1) / 2) << al.s << Spaces(n / 2); +} + +template <typename T> +std::ostream& operator<<(std::ostream &os, AlignBase<T, Dir::right> al) { +  return os << Spaces(al.outw - al.sw) << al.s; +} + +int main(int argc, char **argv) { +  char sepchar = ' '; +  std::vector<Fieldspec> fieldspecs; +  fieldspecs.reserve(4); + +  for (int i = 1; i < argc; i++) { +    if (argv[i][0] != '-') { +      fieldspecs.push_back(parse_fieldspec(argv[i])); +    } else if (strcmp(argv[i], "--help") == 0) { +      usage(argv[0]); +      return 0; +    } else { +      for (int j = 1; argv[i][j]; j++) { +        switch (argv[i][j]) { +          case 'h': usage(argv[0]); return 0; +          case 's': +            if (argv[i][j+1] != '\0') { +              sepchar = argv[i][j+1]; +              j++; +            } else if (i + 1 < argc && argv[i+1][1] == '\0') { +              sepchar = argv[i+1][1]; +            } else { +              std::cerr << "Argument to '-s' missing or multiple bytes" << std::endl; +              return 1; +            } +            break; +          default: +            std::cerr << "Invalid option '-" << argv[i][j] << "'" << std::endl; +            return 1; +        } +      } +    } +  } + +#define TAB 0 +#define ROW 1 +#define COL 2 +#define VAL 3 + +  Fieldspec specs[4]; +  switch (fieldspecs.size()) { +    case 2: +      specs[ROW] = std::move(fieldspecs[0]); +      specs[VAL] = std::move(fieldspecs[1]); +      break; +    case 3: +      specs[ROW] = std::move(fieldspecs[0]); +      specs[COL] = std::move(fieldspecs[1]); +      specs[VAL] = std::move(fieldspecs[2]); +      break; +    case 4: +      specs[TAB] = std::move(fieldspecs[0]); +      specs[ROW] = std::move(fieldspecs[1]); +      specs[COL] = std::move(fieldspecs[2]); +      specs[VAL] = std::move(fieldspecs[3]); +      break; +    default: +      std::cerr << "Unexpected number of field specs; expected 2, 3 or 4" << std::endl; +      return 1; +  } + +  // Need a box around the std::string to make sure their buffer stays stable +  // even if it does small-string optimisation +  std::vector<std::unique_ptr<std::string>> labels[3]; +  std::map<std::tuple<int, int, int>, std::string> values;  // key: indices in tab, row, col + +  { +    // string_views refer to the strings in labels +    std::unordered_map<std::string_view, int> labels_idx[3];  // only tab, row, col + +    std::string line; +    std::vector<std::string> parts; +    std::string texts[4]; +    while (std::getline(std::cin, line)) { +      parts.clear(); +      size_t cursor = 0; +      while (cursor < line.size()) { +        size_t idx = line.find(sepchar, cursor); +        if (idx == std::string::npos) idx = line.size(); +        parts.emplace_back(line, cursor, idx - cursor); +        cursor = idx + 1; +      } +      if (parts.size() == 0) continue;  // empty line, no fields + +      for (int i = 0; i < 4; i++) collect(texts[i], specs[i], parts, sepchar); + +      // check that all parts we need are indeed there +      bool present = true; +      for (int i = 0; i < 4; i++) { +        if (!specs[i].empty() && texts[i].empty()) { +          present = false; +          break; +        } +      } +      if (!present) continue; + +      // add to the label lists and collect indices +      int idxs[3]; +      for (int i = 0; i < 3; i++) { +        auto it = labels_idx[i].find(texts[i]); +        if (it == labels_idx[i].end()) { +          idxs[i] = labels[i].size(); +          labels[i].push_back(std::make_unique<std::string>(std::move(texts[i]))); +          texts[i].clear(); +          labels_idx[i].emplace(*labels[i].back(), idxs[i]); +        } else { +          idxs[i] = it->second; +        } +      } + +      // store the value at the appropriate index triplet +      values.emplace(std::make_tuple(idxs[TAB], idxs[ROW], idxs[COL]), std::move(texts[VAL])); +    } +  } + +  int leftwid = 0;  // does not include table name +  for (const std::unique_ptr<std::string> &rowname : labels[ROW]) +    leftwid = std::max<int>(leftwid, swidth(*rowname)); + +  std::vector<int> collabwid(labels[COL].size()); +  for (int coli = 0; coli < (int)labels[COL].size(); coli++) +    collabwid[coli] = swidth(*labels[COL][coli]); + +  std::vector<int> rowlabwid(labels[ROW].size()); +  for (int rowi = 0; rowi < (int)labels[ROW].size(); rowi++) +    rowlabwid[rowi] = swidth(*labels[ROW][rowi]); + +  for (int tabi = 0; tabi < (int)labels[TAB].size(); tabi++) { +    if (tabi > 0) std::cout << '\n'; + +    const std::string &tabname = *labels[TAB][tabi]; +    const int thisleftwid = std::max<int>(leftwid, swidth(tabname)); + +    std::vector<int> colvalwid(labels[COL].size()); +    for (int coli = 0; coli < (int)labels[COL].size(); coli++) { +      for (int rowi = 0; rowi < (int)labels[ROW].size(); rowi++) { +        auto it = values.find(std::make_tuple(tabi, rowi, coli)); +        if (it != values.end()) +          colvalwid[coli] = std::max<int>(colvalwid[coli], swidth(it->second)); +      } +    } + +    std::vector<int> colwid(labels[COL].size()); +    for (int coli = 0; coli < (int)labels[COL].size(); coli++) +      colwid[coli] = std::max(collabwid[coli], colvalwid[coli]); + +    if (!specs[COL].empty()) { +      std::cout << Left(tabname, thisleftwid, swidth(tabname)); +      for (int coli = 0; coli < (int)labels[COL].size(); coli++) { +        std::cout << ' ' << Left(*labels[COL][coli], colwid[coli], collabwid[coli]); +      } +      std::cout << '\n'; +    } + +    for (int rowi = 0; rowi < (int)labels[ROW].size(); rowi++) { +      std::cout << Left(*labels[ROW][rowi], thisleftwid, rowlabwid[rowi]); + +      for (int coli = 0; coli < (int)labels[COL].size(); coli++) { +        auto it = values.find(std::make_tuple(tabi, rowi, coli)); +        if (it != values.end()) { +          std::cout << ' ' << AlignBase<Right, Dir::center>(Right(it->second, colvalwid[coli], swidth(it->second)), colwid[coli], colvalwid[coli]); +        } else { +          std::cout << Spaces(colwid[coli] + 1); +        } +      } +      std::cout << '\n'; +    } +  } + +  std::cout << std::flush; +} | 
