path: root/main.cpp
diff options
Diffstat (limited to 'main.cpp')
1 files changed, 261 insertions, 0 deletions
diff --git a/main.cpp b/main.cpp
new file mode 100644
index 0000000..3a919c7
--- /dev/null
+++ b/main.cpp
@@ -0,0 +1,261 @@
+#include <iostream>
+#include <vector>
+#include <stdexcept>
+#include <complex>
+#include <cstdlib>
+#include <cstring>
+#include <cmath>
+#include <cassert>
+#include <sndfile.h>
+#include <fftw3.h>
+#include "cpp-window/window.h"
+class Audio {
+ Audio(const char *fname) {
+ SF_INFO sf_info;
+ sf_info.format = 0;
+ SNDFILE *sndfile = sf_open(fname, SFM_READ, &sf_info);
+ if (!sndfile) {
+ std::cerr << sf_strerror(nullptr) << std::endl;
+ exit(1);
+ }
+ nch = sf_info.channels;
+ nfr = sf_info.frames;
+ srate = sf_info.samplerate;
+ data.resize(nch * nfr);
+ size_t nread = sf_readf_double(sndfile,, nfr);
+ if (nread != nfr) throw std::runtime_error("Failed to parse audio file");
+ int err = sf_close(sndfile);
+ if (err != 0) throw std::runtime_error(sf_error_number(err));
+ }
+ size_t channels() const { return nch; }
+ size_t frames() const { return nfr; }
+ size_t sample_rate() const { return srate; }
+ double at(size_t channel, size_t frame) const {
+ assert(channel < nch && frame < nfr);
+ return data[nch * frame + channel];
+ }
+ class Channel;
+ Channel channel(size_t channel) const {
+ assert(channel < nch);
+ return Channel(*this, channel);
+ }
+ class Channel {
+ public:
+ double at(size_t frame) const { return, frame); }
+ size_t frames() const { return audio.frames(); }
+ size_t sample_rate() const { return audio.sample_rate(); }
+ private:
+ Channel(const Audio &audio, size_t ch) : audio{audio}, ch{ch} {}
+ const Audio &audio;
+ const size_t ch;
+ friend Channel Audio::channel(size_t) const;
+ };
+ size_t srate, nch, nfr;
+ std::vector<double> data;
+class FFTW {
+ static FFTW plan(size_t N) {
+ FFTW fftw{N};
+ = fftw_alloc_real(N);
+ assert(;
+ fftw.out = fftw_alloc_real(N);
+ assert(fftw.out);
+ = fftw_plan_r2r_1d(N,, fftw.out, FFTW_R2HC, FFTW_MEASURE);
+ assert(;
+ return fftw;
+ }
+ ~FFTW() {
+ fftw_free(in);
+ fftw_free(out);
+ fftw_destroy_plan(pl);
+ }
+ inline size_t length() const { return N; }
+ inline double* input() { return in; }
+ inline double* output() { return out; }
+ void execute() { fftw_execute(pl); }
+ FFTW(size_t N) : N{N} {}
+ const size_t N;
+ double *in, *out;
+ fftw_plan pl;
+class Spectrogram {
+ Spectrogram(const Audio::Channel &chan, size_t resol)
+ : resol{resol}
+ , samplerate{chan.sample_rate()}
+ , specs(chan.frames() / resol)
+ {
+ FFTW fftw = FFTW::plan(resol);
+ for (size_t i = 0; i < specs.size(); i++) {
+ for (size_t j = 0; j < resol; j++) {
+ fftw.input()[j] = * i + j);
+ }
+ fftw.execute();
+ const double *output = fftw.output();
+ specs[i].resize(resol / 2 + 1);
+ specs[i][0] = output[0];
+ for (size_t j = 1; j < (resol + 1) / 2; j++) {
+ specs[i][j] = {output[j], output[resol - j]};
+ }
+ if (resol % 2 == 0) {
+ specs[i][resol / 2] = output[resol / 2];
+ }
+ }
+ }
+ size_t length() const { return specs.size(); }
+ size_t resolution() const { return resol; }
+ const std::vector<std::complex<double>>& at(size_t index) const {
+ return specs[index];
+ }
+ // Takes index into one spectrogram vector
+ double to_hz(size_t spec_idx) const {
+ return spec_idx * (double)samplerate / resol;
+ }
+ // Returns index into one spectrogram vector; might be out of range
+ size_t from_hz(double hz) const {
+ return hz * resol / samplerate;
+ }
+ const size_t resol;
+ const size_t samplerate;
+ std::vector<std::vector<std::complex<double>>> specs;
+double hz_to_key(double hz) {
+ return log2(hz / 27.5) * 12 + 1;
+double key_to_hz(double key) {
+ return pow(2, (key - 1) / 12) * 27.5;
+int main(int argc, char **argv) {
+ if (argc != 2) {
+ std::cerr << "Usage: " << argv[0] << " <video.wav>" << std::endl;
+ return 1;
+ }
+ const char *audio_fname = argv[1];
+ std::cout << "Reading audio file..." << std::flush;
+ const Audio audio{audio_fname};
+ std::cout << " done" << std::endl;
+ std::cout << "Channels: " << audio.channels();
+ if (audio.channels() > 1) std::cout << " (choosing channel 0)";
+ std::cout << std::endl;
+ Audio::Channel chan =;
+ std::cout << "Frames: " << chan.frames() << std::endl;
+ std::cout << "Sample rate: " << chan.sample_rate() << std::endl;
+ std::cout << "Duration: " << (double)chan.frames() / chan.sample_rate() << "s" << std::endl;
+ std::cout << "Creating spectrogram..." << std::flush;
+ Spectrogram spectro{chan, 4096};
+ std::cout << " done" << std::endl;
+ const double start_sec = 9, end_sec = 21;
+ // const double start_sec = 6*60+1, end_sec = 6*60+24;
+ const double start_frame = start_sec * chan.sample_rate();
+ const double end_frame = end_sec * chan.sample_rate();
+ Window{"Audio", 640, 480, Window::Opts{}.resizable(true)}.event_loop(
+ [&](const SDL_Event &e) {
+ if (e.type == SDL_KEYDOWN && e.key.keysym.sym == SDLK_q) {
+ return Window::ACT_STOP;
+ }
+ return Window::ACT_OK;
+ },
+ [&](Window::Buffer &buffer) {
+ // std::cout << "redraw" << std::endl;
+ buffer.clear({0, 0, 0});
+ using Clr = Window::Buffer::Clr;
+ const auto draw_bar = [&buffer](float x, float y1, float y2, Clr clr) {
+ buffer.plotf(x, y1, clr);
+ for (int y = std::ceil(y1); y <= y2; y++) buffer.plotf(x, y, clr);
+ buffer.plotf(x, y2, clr);
+ };
+ const double low_key = 1;
+ const double high_key = 88;
+ const double low_hz = key_to_hz(low_key);
+ const double high_hz = key_to_hz(high_key);
+ // + 1 because of rounding down
+ const size_t low_idx = std::max<size_t>(0, spectro.from_hz(low_hz) + 1);
+ const size_t high_idx = std::min(, spectro.from_hz(high_hz));
+ const auto key_to_y = [&buffer, &low_key, &high_key](double key) -> double {
+ return buffer.height() - 1 - (key - low_key) / (high_key - low_key) * (buffer.height() - 1);
+ };
+ // std::cout << "length = " << << std::endl;
+ // std::cout << "low_idx=" << low_idx << " high_idx=" << high_idx << std::endl;
+ // std::cout << "buffer height = " << buffer.height() << std::endl;
+ // const size_t key = high_key - 1;
+ // std::cout << key << ' ' << key_to_hz(key) << ' ' << spectro.from_hz(key_to_hz(key)) << ' ' << spectro.to_hz(spectro.from_hz(key_to_hz(key))) << ' ' << hz_to_key(spectro.to_hz(spectro.from_hz(key_to_hz(key)))) << std::endl;
+ for (int x = 0; x < buffer.width(); x++) {
+ const size_t si = (start_frame + x * (end_frame - start_frame) / (buffer.width() - 1)) / spectro.resolution();
+ const std::vector<std::complex<double>> &spec =;
+ // std::cout << "x=" << x << " si=" << si << std::endl;
+ for (size_t j = low_idx; j < high_idx; j++) {
+ const double key1 = hz_to_key(spectro.to_hz(j));
+ const double key2 = hz_to_key(spectro.to_hz(j+1));
+ const float y1 = key_to_y(key1);
+ const float y2 = key_to_y(key2);
+ const float s = std::abs(spec[j]) / 8;
+ // std::cout << "y1=" << y1 << " y2=" << y2 << " j=" << j << " s=" << s << std::endl;
+ const float alpha = s / (s + 1);
+ const Clr clr = Clr{255*alpha, 100*alpha, 100*alpha};
+ draw_bar(x, y2, y1, clr);
+ }
+ if (x % 100 == 0) {
+ for (int key = 4; key <= 88; key += 12) {
+ buffer.plotf(x, key_to_y(key), Clr{255, 255, 255});
+ }
+ }
+ }
+ }
+ );