#include #include #include #include #include #include #include #include #include #include #include "cpp-window/window.h" class Audio { public: Audio(const char *fname) { SF_INFO sf_info; sf_info.format = 0; SNDFILE *sndfile = sf_open(fname, SFM_READ, &sf_info); if (!sndfile) { std::cerr << sf_strerror(nullptr) << std::endl; exit(1); } nch = sf_info.channels; nfr = sf_info.frames; srate = sf_info.samplerate; data.resize(nch * nfr); size_t nread = sf_readf_double(sndfile, data.data(), nfr); if (nread != nfr) throw std::runtime_error("Failed to parse audio file"); int err = sf_close(sndfile); if (err != 0) throw std::runtime_error(sf_error_number(err)); } size_t channels() const { return nch; } size_t frames() const { return nfr; } size_t sample_rate() const { return srate; } double at(size_t channel, size_t frame) const { assert(channel < nch && frame < nfr); return data[nch * frame + channel]; } class Channel; Channel channel(size_t channel) const { assert(channel < nch); return Channel(*this, channel); } class Channel { public: double at(size_t frame) const { return audio.at(ch, frame); } size_t frames() const { return audio.frames(); } size_t sample_rate() const { return audio.sample_rate(); } private: Channel(const Audio &audio, size_t ch) : audio{audio}, ch{ch} {} const Audio &audio; const size_t ch; friend Channel Audio::channel(size_t) const; }; private: size_t srate, nch, nfr; std::vector data; }; class FFTW { public: static FFTW plan(size_t N) { FFTW fftw{N}; fftw.in = fftw_alloc_real(N); assert(fftw.in); fftw.out = fftw_alloc_real(N); assert(fftw.out); fftw.pl = fftw_plan_r2r_1d(N, fftw.in, fftw.out, FFTW_R2HC, FFTW_MEASURE); assert(fftw.pl); return fftw; } ~FFTW() { fftw_free(in); fftw_free(out); fftw_destroy_plan(pl); } inline size_t length() const { return N; } inline double* input() { return in; } inline double* output() { return out; } void execute() { fftw_execute(pl); } private: FFTW(size_t N) : N{N} {} const size_t N; double *in, *out; fftw_plan pl; }; class Spectrogram { public: Spectrogram(const Audio::Channel &chan, size_t resol) : resol{resol} , samplerate{chan.sample_rate()} , specs(chan.frames() / resol) { FFTW fftw = FFTW::plan(resol); for (size_t i = 0; i < specs.size(); i++) { for (size_t j = 0; j < resol; j++) { fftw.input()[j] = chan.at(resol * i + j); } fftw.execute(); const double *output = fftw.output(); specs[i].resize(resol / 2 + 1); specs[i][0] = output[0]; for (size_t j = 1; j < (resol + 1) / 2; j++) { specs[i][j] = {output[j], output[resol - j]}; } if (resol % 2 == 0) { specs[i][resol / 2] = output[resol / 2]; } } } size_t length() const { return specs.size(); } size_t resolution() const { return resol; } const std::vector>& at(size_t index) const { return specs[index]; } // Takes index into one spectrogram vector double to_hz(size_t spec_idx) const { return spec_idx * (double)samplerate / resol; } // Returns index into one spectrogram vector; might be out of range size_t from_hz(double hz) const { return hz * resol / samplerate; } private: const size_t resol; const size_t samplerate; std::vector>> specs; }; double hz_to_key(double hz) { return log2(hz / 27.5) * 12 + 1; } double key_to_hz(double key) { return pow(2, (key - 1) / 12) * 27.5; } int main(int argc, char **argv) { if (argc != 2) { std::cerr << "Usage: " << argv[0] << " " << std::endl; return 1; } const char *audio_fname = argv[1]; std::cout << "Reading audio file..." << std::flush; const Audio audio{audio_fname}; std::cout << " done" << std::endl; std::cout << "Channels: " << audio.channels(); if (audio.channels() > 1) std::cout << " (choosing channel 0)"; std::cout << std::endl; Audio::Channel chan = audio.channel(0); std::cout << "Frames: " << chan.frames() << std::endl; std::cout << "Sample rate: " << chan.sample_rate() << std::endl; std::cout << "Duration: " << (double)chan.frames() / chan.sample_rate() << "s" << std::endl; std::cout << "Creating spectrogram..." << std::flush; Spectrogram spectro{chan, 4096}; std::cout << " done" << std::endl; const double start_sec = 9, end_sec = 21; // const double start_sec = 6*60+1, end_sec = 6*60+24; const double start_frame = start_sec * chan.sample_rate(); const double end_frame = end_sec * chan.sample_rate(); Window{"Audio", 640, 480, Window::Opts{}.resizable(true)}.event_loop( [&](const SDL_Event &e) { if (e.type == SDL_KEYDOWN && e.key.keysym.sym == SDLK_q) { return Window::ACT_STOP; } return Window::ACT_OK; }, [&](Window::Buffer &buffer) { // std::cout << "redraw" << std::endl; buffer.clear({0, 0, 0}); using Clr = Window::Buffer::Clr; const auto draw_bar = [&buffer](float x, float y1, float y2, Clr clr) { buffer.plotf(x, y1, clr); for (int y = std::ceil(y1); y <= y2; y++) buffer.plotf(x, y, clr); buffer.plotf(x, y2, clr); }; const double low_key = 1; const double high_key = 88; const double low_hz = key_to_hz(low_key); const double high_hz = key_to_hz(high_key); // + 1 because of rounding down const size_t low_idx = std::max(0, spectro.from_hz(low_hz) + 1); const size_t high_idx = std::min(spectro.at(0).size(), spectro.from_hz(high_hz)); const auto key_to_y = [&buffer, &low_key, &high_key](double key) -> double { return buffer.height() - 1 - (key - low_key) / (high_key - low_key) * (buffer.height() - 1); }; // std::cout << "length = " << spectro.at(0).size() << std::endl; // std::cout << "low_idx=" << low_idx << " high_idx=" << high_idx << std::endl; // std::cout << "buffer height = " << buffer.height() << std::endl; // const size_t key = high_key - 1; // std::cout << key << ' ' << key_to_hz(key) << ' ' << spectro.from_hz(key_to_hz(key)) << ' ' << spectro.to_hz(spectro.from_hz(key_to_hz(key))) << ' ' << hz_to_key(spectro.to_hz(spectro.from_hz(key_to_hz(key)))) << std::endl; for (int x = 0; x < buffer.width(); x++) { const size_t si = (start_frame + x * (end_frame - start_frame) / (buffer.width() - 1)) / spectro.resolution(); const std::vector> &spec = spectro.at(si); // std::cout << "x=" << x << " si=" << si << std::endl; for (size_t j = low_idx; j < high_idx; j++) { const double key1 = hz_to_key(spectro.to_hz(j)); const double key2 = hz_to_key(spectro.to_hz(j+1)); const float y1 = key_to_y(key1); const float y2 = key_to_y(key2); const float s = std::abs(spec[j]) / 8; // std::cout << "y1=" << y1 << " y2=" << y2 << " j=" << j << " s=" << s << std::endl; const float alpha = s / (s + 1); const Clr clr = Clr{255*alpha, 100*alpha, 100*alpha}; draw_bar(x, y2, y1, clr); } if (x % 100 == 0) { for (int key = 4; key <= 88; key += 12) { buffer.plotf(x, key_to_y(key), Clr{255, 255, 255}); } } } } ); }