#include #include #include #include #include #include #include #include #include #include #include #include static void usage(const char *argv0) { std::cerr << "Usage: " << argv0 << " [options] \n" " " << argv0 << " [options] \n" " " << argv0 << " [options] \n" "Splits lines on a separator character (-s) and creates tables from the result.\n" "Each positional argument is a field index specification (after splitting) that\n" "determines which parts of the input lines get used for what purpose.\n" "A field index specification is like that of the '-f' flag to cut(1). If multiple\n" "fields are specified for a particular purpose, they are joined using the\n" "separator character and henceforth treated as a single string. If a table cell\n" "has multiple values in the input, the values are appended using the separator\n" "character.\n" "Row, column and table labels are printed in order of occurrence in the input.\n" "UTF-8 is assumed for string-length calculation.\n" "\n" "Options:\n" " -h Show help.\n" " -s DELIM The character that separates fields. Default space (' ').\n"; } struct Range { int from, to; }; struct Fieldspec { std::vector ranges; bool empty() const { return ranges.empty(); } }; static std::pair parse_int(const char *str) { errno = 0; const char *endp = NULL; long val = strtol(str, (char**)&endp, 10); if (str[0] == '\0' || errno != 0 || (long)(int)val != val) { std::cerr << "Invalid number: " << str << std::endl; exit(1); } return std::make_pair(val, endp); } static Fieldspec parse_fieldspec(const char *str) { Fieldspec spec; while (true) { int num; const char *endp; std::tie(num, endp) = parse_int(str); if (*endp == '-') { int num2; std::tie(num2, endp) = parse_int(endp + 1); spec.ranges.push_back(Range{num - 1, num2 - 1}); } else { spec.ranges.push_back(Range{num - 1, num - 1}); } if (*endp == '\0') break; if (*endp == ',') str = endp + 1; else { std::cerr << "Invalid character in field spec: '" << *endp << "'" << std::endl; exit(1); } } return spec; } static void collect(std::string &dest, const Fieldspec &spec, const std::vector &parts, char sepchar) { dest.clear(); for (const Range &range : spec.ranges) { for (int i = range.from; i <= std::min(range.to, parts.size() - 1); i++) { if (i > range.from) dest += sepchar; dest += parts[i]; } } }; struct Spaces { const int n; Spaces(int n) : n{n} {} }; static std::ostream& operator<<(std::ostream &os, Spaces spaces) { for (int i = 0; i < spaces.n; i++) os << ' '; return os; } static int swidth(const std::string &s) { int len = 0; for (char c : s) len += (c & 0xc0) != 0x80; return len; } enum class Dir { left, center, right }; template struct AlignBase { T s; const int outw, sw; AlignBase(T s, int outw, int sw) : s{s}, outw{outw}, sw{sw} {} }; using Left = AlignBase; using Center = AlignBase; using Right = AlignBase; template std::ostream& operator<<(std::ostream &os, AlignBase al) { return os << al.s << Spaces(al.outw - al.sw); } template std::ostream& operator<<(std::ostream &os, AlignBase al) { const int n = al.outw - al.sw; return os << Spaces((n + 1) / 2) << al.s << Spaces(n / 2); } template std::ostream& operator<<(std::ostream &os, AlignBase al) { return os << Spaces(al.outw - al.sw) << al.s; } int main(int argc, char **argv) { char sepchar = ' '; std::vector fieldspecs; fieldspecs.reserve(4); for (int i = 1; i < argc; i++) { if (argv[i][0] != '-') { fieldspecs.push_back(parse_fieldspec(argv[i])); } else if (strcmp(argv[i], "--help") == 0) { usage(argv[0]); return 0; } else { for (int j = 1; argv[i][j]; j++) { switch (argv[i][j]) { case 'h': usage(argv[0]); return 0; case 's': if (argv[i][j+1] != '\0') { sepchar = argv[i][j+1]; j++; } else if (i + 1 < argc && argv[i+1][1] == '\0') { sepchar = argv[i+1][1]; } else { std::cerr << "Argument to '-s' missing or multiple bytes" << std::endl; return 1; } break; default: std::cerr << "Invalid option '-" << argv[i][j] << "'" << std::endl; return 1; } } } } #define TAB 0 #define ROW 1 #define COL 2 #define VAL 3 Fieldspec specs[4]; switch (fieldspecs.size()) { case 2: specs[ROW] = std::move(fieldspecs[0]); specs[VAL] = std::move(fieldspecs[1]); break; case 3: specs[ROW] = std::move(fieldspecs[0]); specs[COL] = std::move(fieldspecs[1]); specs[VAL] = std::move(fieldspecs[2]); break; case 4: specs[TAB] = std::move(fieldspecs[0]); specs[ROW] = std::move(fieldspecs[1]); specs[COL] = std::move(fieldspecs[2]); specs[VAL] = std::move(fieldspecs[3]); break; default: std::cerr << "Unexpected number of field specs; expected 2, 3 or 4" << std::endl; return 1; } // Need a box around the std::string to make sure their buffer stays stable // even if it does small-string optimisation std::vector> labels[3]; std::map, std::string> values; // key: indices in tab, row, col { // string_views refer to the strings in labels std::unordered_map labels_idx[3]; // only tab, row, col std::string line; std::vector parts; std::string texts[4]; while (std::getline(std::cin, line)) { parts.clear(); size_t cursor = 0; while (cursor < line.size()) { size_t idx = line.find(sepchar, cursor); if (idx == std::string::npos) idx = line.size(); parts.emplace_back(line, cursor, idx - cursor); cursor = idx + 1; } if (parts.size() == 0) continue; // empty line, no fields for (int i = 0; i < 4; i++) collect(texts[i], specs[i], parts, sepchar); // check that all parts we need are indeed there bool present = true; for (int i = 0; i < 4; i++) { if (!specs[i].empty() && texts[i].empty()) { present = false; break; } } if (!present) continue; // add to the label lists and collect indices int idxs[3]; for (int i = 0; i < 3; i++) { auto it = labels_idx[i].find(texts[i]); if (it == labels_idx[i].end()) { idxs[i] = labels[i].size(); labels[i].push_back(std::make_unique(std::move(texts[i]))); texts[i].clear(); labels_idx[i].emplace(*labels[i].back(), idxs[i]); } else { idxs[i] = it->second; } } // store the value at the appropriate index triplet values.emplace(std::make_tuple(idxs[TAB], idxs[ROW], idxs[COL]), std::move(texts[VAL])); } } int leftwid = 0; // does not include table name for (const std::unique_ptr &rowname : labels[ROW]) leftwid = std::max(leftwid, swidth(*rowname)); std::vector collabwid(labels[COL].size()); for (int coli = 0; coli < (int)labels[COL].size(); coli++) collabwid[coli] = swidth(*labels[COL][coli]); std::vector rowlabwid(labels[ROW].size()); for (int rowi = 0; rowi < (int)labels[ROW].size(); rowi++) rowlabwid[rowi] = swidth(*labels[ROW][rowi]); for (int tabi = 0; tabi < (int)labels[TAB].size(); tabi++) { if (tabi > 0) std::cout << '\n'; const std::string &tabname = *labels[TAB][tabi]; const int thisleftwid = std::max(leftwid, swidth(tabname)); std::vector colvalwid(labels[COL].size()); for (int coli = 0; coli < (int)labels[COL].size(); coli++) { for (int rowi = 0; rowi < (int)labels[ROW].size(); rowi++) { auto it = values.find(std::make_tuple(tabi, rowi, coli)); if (it != values.end()) colvalwid[coli] = std::max(colvalwid[coli], swidth(it->second)); } } std::vector colwid(labels[COL].size()); for (int coli = 0; coli < (int)labels[COL].size(); coli++) colwid[coli] = std::max(collabwid[coli], colvalwid[coli]); if (!specs[COL].empty()) { std::cout << Left(tabname, thisleftwid, swidth(tabname)); for (int coli = 0; coli < (int)labels[COL].size(); coli++) { std::cout << ' ' << Left(*labels[COL][coli], colwid[coli], collabwid[coli]); } std::cout << '\n'; } for (int rowi = 0; rowi < (int)labels[ROW].size(); rowi++) { std::cout << Left(*labels[ROW][rowi], thisleftwid, rowlabwid[rowi]); for (int coli = 0; coli < (int)labels[COL].size(); coli++) { auto it = values.find(std::make_tuple(tabi, rowi, coli)); if (it != values.end()) { std::cout << ' ' << AlignBase(Right(it->second, colvalwid[coli], swidth(it->second)), colwid[coli], colvalwid[coli]); } else { std::cout << Spaces(colwid[coli] + 1); } } std::cout << '\n'; } } std::cout << std::flush; }