summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Smeding <tom.smeding@gmail.com>2019-09-14 13:29:45 +0200
committerTom Smeding <tom.smeding@gmail.com>2019-09-14 13:29:45 +0200
commit40ad7163a09b80d0ada0f6c4d5ad23427ea24665 (patch)
tree2204890424e1f40eb8f9b925d0edb749df66133a
parent17cabe7773137c777d75379284ed600df5c5e500 (diff)
Further cleanup and code+algorithm simplification
-rw-r--r--histogram.cpp134
1 files changed, 87 insertions, 47 deletions
diff --git a/histogram.cpp b/histogram.cpp
index 855776d..c7bbc6a 100644
--- a/histogram.cpp
+++ b/histogram.cpp
@@ -10,12 +10,15 @@
static const char *argv0;
void usage() {
- std::cerr << "Usage: " << argv0 << " [<lowbound> <highbound>] [<nbins>]" << std::endl
+ std::cerr << "Usage: " << argv0 << " [-h] [<lowbound> <highbound>] [<nbins>]" << std::endl
<< "Prints a simple histogram of stdin data in your terminal, using" << std::endl
<< "either the given lower and upper bounds, or the minimum and" << std::endl
<< "maximum extracted from the data." << std::endl
- << "The number of bins can also be passed." << std::endl
- << "Data on stdin is assumed to be a list of floating-point values." << std::endl;
+ << "The number of bins can also be passed. (If unspecified, defaults" << std::endl
+ << "to 10.)" << std::endl
+ << "Data on stdin is assumed to be a list of floating-point values." << std::endl
+ << std::endl
+ << " -h Show this help." << std::endl;
}
double parsefloat(const char *s, const char *errprefix) {
@@ -45,27 +48,46 @@ int main(int argc, char **argv) {
const int64_t BARWIDTH = 80;
- double low, high;
+ double low = 0, high = 1; // will be overwritten either from arguments or from data
bool havebounds;
int64_t nbins = 10;
- if (argc == 1) {
- havebounds = false;
- } else if (argc == 2) {
- havebounds = false;
- nbins = parseint(argv[1], "Invalid number");
- } else if (argc == 3) {
- havebounds = true;
- low = parsefloat(argv[1], "Invalid number");
- high = parsefloat(argv[2], "Invalid number");
- } else if (argc == 4) {
- havebounds = true;
- low = parsefloat(argv[1], "Invalid number");
- high = parsefloat(argv[2], "Invalid number");
- nbins = parseint(argv[3], "Invalid number");
- } else {
- usage();
- return 1;
+ std::vector<char*> plainargs;
+ for (int i = 1; i < argc; i++) {
+ if (strcmp(argv[i], "-h") == 0) {
+ usage();
+ return 0;
+ } else {
+ plainargs.push_back(argv[i]);
+ }
+ }
+
+ switch (plainargs.size()) {
+ case 0:
+ havebounds = false;
+ break;
+
+ case 1:
+ havebounds = false;
+ nbins = parseint(plainargs[0], "Invalid number");
+ break;
+
+ case 2:
+ havebounds = true;
+ low = parsefloat(plainargs[0], "Invalid number");
+ high = parsefloat(plainargs[1], "Invalid number");
+ break;
+
+ case 3:
+ havebounds = true;
+ low = parsefloat(plainargs[0], "Invalid number");
+ high = parsefloat(plainargs[1], "Invalid number");
+ nbins = parseint(plainargs[2], "Invalid number");
+ break;
+
+ default:
+ usage();
+ return 1;
}
if (nbins < 1) {
@@ -80,26 +102,47 @@ int main(int argc, char **argv) {
return 1;
}
- double minval = INFINITY, maxval = -INFINITY;
+ // Read data and collect low and high bounds, if necessary
std::vector<double> values;
- while (true) {
- double v;
- std::cin >> v;
- if (!std::cin) break;
- values.push_back(v);
- if (v < minval) minval = v;
- if (v > maxval) maxval = v;
- }
- if (!havebounds) {
- low = minval;
- high = maxval;
+ {
+ double minval = INFINITY, maxval = -INFINITY;
+ while (true) {
+ double v;
+ std::cin >> v;
+ if (!std::cin) {
+ // Check whether it was invalid data or EOF that let the read fail
+ std::cin.clear();
+ std::string str;
+ std::cin >> str;
+ if (std::cin) {
+ std::cerr << "histogram: Invalid data starting with word '"
+ << str << "'" << std::endl;
+ usage();
+ return 1;
+ } else {
+ // EOF, so we're done
+ break;
+ }
+ }
+ values.push_back(v);
+ if (v < minval) minval = v;
+ if (v > maxval) maxval = v;
+ }
+ if (!havebounds) {
+ low = minval;
+ high = maxval;
+ }
}
- std::sort(values.begin(), values.end());
+ if (low == high) {
+ std::cerr << "histogram: Plot range contains only one point: " << low << std::endl;
+ usage();
+ return 1;
+ }
- std::vector<int64_t> histogram(nbins);
+ // Collect data in the histogram
+ std::vector<int64_t> histogram(nbins, 0);
- int64_t binidx = 0, tally = 0, maxtally = -1;
for (double v : values) {
if (v < low || v > high) {
// cerr << "Point " << v << " out of range!" << endl;
@@ -108,22 +151,18 @@ int main(int argc, char **argv) {
int64_t bin = (v - low) / (high - low) * nbins;
if (bin == nbins) bin--;
assert(bin >= 0 && bin < nbins);
- if (bin != binidx) {
- histogram[binidx] = tally;
- if (tally > maxtally) maxtally = tally;
- binidx = bin;
- tally = 0;
- }
- tally++;
+ histogram[bin]++;
}
- histogram[binidx] = tally;
- if (tally > maxtally) maxtally = tally;
+
+ // Compute statistics
+ int64_t maxtally = *std::max_element(histogram.begin(), histogram.end());
if (maxtally == 0) {
std::cerr << "histogram: No data in range" << std::endl;
return 1;
}
+ // Print histogram
char fullbar[BARWIDTH + 1];
memset(fullbar, '#', BARWIDTH);
fullbar[BARWIDTH] = '\0';
@@ -133,11 +172,12 @@ int main(int argc, char **argv) {
for (size_t i = 0; i < histogram.size(); i++) {
int64_t tally = histogram[i];
+ double binlow = low + (double)i / nbins * (high - low);
+ double binhigh = low + (double)(i + 1) / nbins * (high - low);
int64_t width = BARWIDTH * tally / maxtally;
char terminator = ")]"[i == histogram.size() - 1];
std::cout << fullbar + BARWIDTH - width << emptybar + width
- << " [" << std::setw(11) << low + (double)i / nbins * (high - low) << " - "
- << std::setw(11) << low + (double)(i + 1) / nbins * (high - low) << terminator
+ << " [" << std::setw(11) << binlow << " - " << std::setw(11) << binhigh << terminator
<< " [" << tally << "]" << std::endl;
}
}