From b85687598eb26398c4bf2df43509ee5a2c7bbc74 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Mon, 13 Apr 2020 09:54:02 +0200 Subject: Initial --- .gitignore | 5 ++ .gitmodules | 3 + Makefile | 28 +++++++ cpp-window | 1 + main.cpp | 261 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 298 insertions(+) create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 Makefile create mode 160000 cpp-window create mode 100644 main.cpp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..66746e6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +obj/ +compile_commands.json +*.mp4 +*.wav +recog diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..b04a97b --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "cpp-window"] + path = cpp-window + url = git@tomsmeding.com:cpp-window diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5e22976 --- /dev/null +++ b/Makefile @@ -0,0 +1,28 @@ +CXX = g++ +CXXFLAGS = -Wall -Wextra -std=c++17 -O2 -g +LDFLAGS = -lsndfile -lfftw3 +TARGET = recog + +CXXFLAGS += $(shell pkg-config --cflags sdl2) +LDFLAGS += $(shell pkg-config --libs sdl2) + +OBJDIR = obj + +.PHONY: all clean + +all: $(TARGET) + +clean: + @echo "Cleaning" + @rm -f $(TARGET) + @rm -rf $(OBJDIR) + + +$(OBJDIR)/%.o: %.cpp $(wildcard *.h) + @mkdir -p $(OBJDIR) + @echo "CXX $<" + @$(CXX) $(CXXFLAGS) -c -o $@ $< + +$(TARGET): $(patsubst %.cpp,$(OBJDIR)/%.o,$(wildcard *.cpp)) + @echo "LD -o $@" + @$(CXX) -o $@ $^ $(LDFLAGS) diff --git a/cpp-window b/cpp-window new file mode 160000 index 0000000..dccf7bf --- /dev/null +++ b/cpp-window @@ -0,0 +1 @@ +Subproject commit dccf7bf421ca62ec8f1c470be4aa72d60dd7201d diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..3a919c7 --- /dev/null +++ b/main.cpp @@ -0,0 +1,261 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cpp-window/window.h" + + +class Audio { +public: + Audio(const char *fname) { + SF_INFO sf_info; + sf_info.format = 0; + SNDFILE *sndfile = sf_open(fname, SFM_READ, &sf_info); + if (!sndfile) { + std::cerr << sf_strerror(nullptr) << std::endl; + exit(1); + } + + nch = sf_info.channels; + nfr = sf_info.frames; + srate = sf_info.samplerate; + + data.resize(nch * nfr); + + size_t nread = sf_readf_double(sndfile, data.data(), nfr); + if (nread != nfr) throw std::runtime_error("Failed to parse audio file"); + + int err = sf_close(sndfile); + if (err != 0) throw std::runtime_error(sf_error_number(err)); + } + + size_t channels() const { return nch; } + size_t frames() const { return nfr; } + size_t sample_rate() const { return srate; } + + double at(size_t channel, size_t frame) const { + assert(channel < nch && frame < nfr); + return data[nch * frame + channel]; + } + + class Channel; + + Channel channel(size_t channel) const { + assert(channel < nch); + return Channel(*this, channel); + } + + class Channel { + public: + double at(size_t frame) const { return audio.at(ch, frame); } + size_t frames() const { return audio.frames(); } + size_t sample_rate() const { return audio.sample_rate(); } + + private: + Channel(const Audio &audio, size_t ch) : audio{audio}, ch{ch} {} + + const Audio &audio; + const size_t ch; + + friend Channel Audio::channel(size_t) const; + }; + +private: + size_t srate, nch, nfr; + std::vector data; +}; + +class FFTW { +public: + static FFTW plan(size_t N) { + FFTW fftw{N}; + fftw.in = fftw_alloc_real(N); + assert(fftw.in); + fftw.out = fftw_alloc_real(N); + assert(fftw.out); + fftw.pl = fftw_plan_r2r_1d(N, fftw.in, fftw.out, FFTW_R2HC, FFTW_MEASURE); + assert(fftw.pl); + return fftw; + } + + ~FFTW() { + fftw_free(in); + fftw_free(out); + fftw_destroy_plan(pl); + } + + inline size_t length() const { return N; } + inline double* input() { return in; } + inline double* output() { return out; } + + void execute() { fftw_execute(pl); } + +private: + FFTW(size_t N) : N{N} {} + + const size_t N; + double *in, *out; + fftw_plan pl; +}; + +class Spectrogram { +public: + Spectrogram(const Audio::Channel &chan, size_t resol) + : resol{resol} + , samplerate{chan.sample_rate()} + , specs(chan.frames() / resol) + { + FFTW fftw = FFTW::plan(resol); + + for (size_t i = 0; i < specs.size(); i++) { + for (size_t j = 0; j < resol; j++) { + fftw.input()[j] = chan.at(resol * i + j); + } + + fftw.execute(); + + const double *output = fftw.output(); + specs[i].resize(resol / 2 + 1); + specs[i][0] = output[0]; + for (size_t j = 1; j < (resol + 1) / 2; j++) { + specs[i][j] = {output[j], output[resol - j]}; + } + if (resol % 2 == 0) { + specs[i][resol / 2] = output[resol / 2]; + } + } + } + + size_t length() const { return specs.size(); } + size_t resolution() const { return resol; } + + const std::vector>& at(size_t index) const { + return specs[index]; + } + + // Takes index into one spectrogram vector + double to_hz(size_t spec_idx) const { + return spec_idx * (double)samplerate / resol; + } + + // Returns index into one spectrogram vector; might be out of range + size_t from_hz(double hz) const { + return hz * resol / samplerate; + } + +private: + const size_t resol; + const size_t samplerate; + std::vector>> specs; +}; + +double hz_to_key(double hz) { + return log2(hz / 27.5) * 12 + 1; +} + +double key_to_hz(double key) { + return pow(2, (key - 1) / 12) * 27.5; +} + +int main(int argc, char **argv) { + if (argc != 2) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; + return 1; + } + + const char *audio_fname = argv[1]; + + std::cout << "Reading audio file..." << std::flush; + const Audio audio{audio_fname}; + std::cout << " done" << std::endl; + + std::cout << "Channels: " << audio.channels(); + if (audio.channels() > 1) std::cout << " (choosing channel 0)"; + std::cout << std::endl; + + Audio::Channel chan = audio.channel(0); + std::cout << "Frames: " << chan.frames() << std::endl; + std::cout << "Sample rate: " << chan.sample_rate() << std::endl; + std::cout << "Duration: " << (double)chan.frames() / chan.sample_rate() << "s" << std::endl; + + std::cout << "Creating spectrogram..." << std::flush; + Spectrogram spectro{chan, 4096}; + std::cout << " done" << std::endl; + + const double start_sec = 9, end_sec = 21; + // const double start_sec = 6*60+1, end_sec = 6*60+24; + const double start_frame = start_sec * chan.sample_rate(); + const double end_frame = end_sec * chan.sample_rate(); + + Window{"Audio", 640, 480, Window::Opts{}.resizable(true)}.event_loop( + [&](const SDL_Event &e) { + if (e.type == SDL_KEYDOWN && e.key.keysym.sym == SDLK_q) { + return Window::ACT_STOP; + } + return Window::ACT_OK; + }, + [&](Window::Buffer &buffer) { + // std::cout << "redraw" << std::endl; + buffer.clear({0, 0, 0}); + + using Clr = Window::Buffer::Clr; + + const auto draw_bar = [&buffer](float x, float y1, float y2, Clr clr) { + buffer.plotf(x, y1, clr); + for (int y = std::ceil(y1); y <= y2; y++) buffer.plotf(x, y, clr); + buffer.plotf(x, y2, clr); + }; + + const double low_key = 1; + const double high_key = 88; + const double low_hz = key_to_hz(low_key); + const double high_hz = key_to_hz(high_key); + // + 1 because of rounding down + const size_t low_idx = std::max(0, spectro.from_hz(low_hz) + 1); + const size_t high_idx = std::min(spectro.at(0).size(), spectro.from_hz(high_hz)); + + const auto key_to_y = [&buffer, &low_key, &high_key](double key) -> double { + return buffer.height() - 1 - (key - low_key) / (high_key - low_key) * (buffer.height() - 1); + }; + + // std::cout << "length = " << spectro.at(0).size() << std::endl; + // std::cout << "low_idx=" << low_idx << " high_idx=" << high_idx << std::endl; + + // std::cout << "buffer height = " << buffer.height() << std::endl; + + // const size_t key = high_key - 1; + // std::cout << key << ' ' << key_to_hz(key) << ' ' << spectro.from_hz(key_to_hz(key)) << ' ' << spectro.to_hz(spectro.from_hz(key_to_hz(key))) << ' ' << hz_to_key(spectro.to_hz(spectro.from_hz(key_to_hz(key)))) << std::endl; + + for (int x = 0; x < buffer.width(); x++) { + const size_t si = (start_frame + x * (end_frame - start_frame) / (buffer.width() - 1)) / spectro.resolution(); + const std::vector> &spec = spectro.at(si); + + // std::cout << "x=" << x << " si=" << si << std::endl; + + for (size_t j = low_idx; j < high_idx; j++) { + const double key1 = hz_to_key(spectro.to_hz(j)); + const double key2 = hz_to_key(spectro.to_hz(j+1)); + const float y1 = key_to_y(key1); + const float y2 = key_to_y(key2); + const float s = std::abs(spec[j]) / 8; + // std::cout << "y1=" << y1 << " y2=" << y2 << " j=" << j << " s=" << s << std::endl; + const float alpha = s / (s + 1); + const Clr clr = Clr{255*alpha, 100*alpha, 100*alpha}; + draw_bar(x, y2, y1, clr); + } + + if (x % 100 == 0) { + for (int key = 4; key <= 88; key += 12) { + buffer.plotf(x, key_to_y(key), Clr{255, 255, 255}); + } + } + } + } + ); +} -- cgit v1.2.3-70-g09d2