1 files changed, 171 insertions, 0 deletions
diff --git a/mandel.cu b/mandel.cu
new file mode 100644
index 0000000..064cf86
--- /dev/null
+++ b/mandel.cu
@@ -0,0 +1,171 @@
+#include <iostream>
+#include <cassert>
+#include <cstdint>
+#include <sys/time.h>
+#include "mandel.h"
+#include "lodepng.h"
+#include "bmp.h"
+
+using namespace std;
+
+#define USE_GPU
+
+#define CUDA_CHECK(expr) { \
+		cudaError_t err_ = (expr); \
+		if (err_ != cudaSuccess) { \
+			cerr << "Cuda error: " << err_ << ": " << cudaGetErrorString(err_) << endl; \
+			cerr << "On " << __FILE__ << ":" << __LINE__ << ": " << #expr << endl; \
+			exit(1); \
+		} \
+	}
+
+struct Mandel {
+	int w, h;
+	int16_t *img;
+	int16_t *devImg;
+	Params *devPar;
+	Mandel *devCtx;  // yes
+
+	double imgh;  // transient; calculated from params each time
+};
+
+Params mandel_default_params() {
+	Params par;
+	par.cx = -0.5; par.cy = 0.0;
+	par.imgw = 3.5;
+	par.maxit = 512;
+	return par;
+}
+
+Mandel* mandel_init(int w, int h) {
+	Mandel *ctx = new Mandel;
+	ctx->w = w;
+	ctx->h = h;
+	ctx->img = new int16_t[w * h];
+#ifdef USE_GPU
+	CUDA_CHECK(cudaMalloc(&ctx->devImg, w * h * sizeof(int16_t)));
+	CUDA_CHECK(cudaMalloc(&ctx->devPar, sizeof(Params)));
+	CUDA_CHECK(cudaMalloc(&ctx->devCtx, sizeof(Mandel)));
+#endif
+	return ctx;
+}
+
+void mandel_free(Mandel *ctx) {
+	delete[] ctx->img;
+#ifdef USE_GPU
+	CUDA_CHECK(cudaFree(ctx->devImg));
+	CUDA_CHECK(cudaFree(ctx->devPar));
+	CUDA_CHECK(cudaFree(ctx->devCtx));
+#endif
+	delete ctx;
+}
+
+#define MANDEL_GENERIC(dst, ctx, par, ix, iy, idx) { \
+		const double x = (par).cx - (par).imgw / 2 + (par).imgw * (ix) / ((ctx).w-1); \
+		const double y = (par).cy - (ctx).imgh / 2 + (ctx).imgh * (iy) / ((ctx).h-1); \
+		double a = x, b = y, a2 = a * a, b2 = b * b; \
+		int16_t iter, maxiter = (par).maxit; \
+		for (iter = 0; iter < maxiter && a2 + b2 < 4; iter++) { \
+			b = 2 * a * b + y; a = a2 - b2 + x; \
+			a2 = a * a; b2 = b * b; \
+		} \
+		(dst)[idx] = iter; \
+	}
+
+__global__ void mandel_gpu(int16_t *dst, const Mandel *ctx, const Params *par) {
+	const int idx = blockDim.x * blockIdx.x + threadIdx.x;
+	const int ix = idx % (int)ctx->w, iy = ctx->h - 1 - idx / (int)ctx->w;
+	if (iy >= ctx->h) return;
+
+	MANDEL_GENERIC(dst, *ctx, *par, ix, iy, idx);
+}
+
+// Unused in GPU mode
+__attribute__((unused))
+static inline void mandel_cpu(int16_t *dst, const Mandel *ctx, const Params *par) {
+	int idx = 0;
+	for (int iy = 0; iy < ctx->h; iy++) {
+		for (int ix = 0; ix < ctx->w; ix++) {
+			MANDEL_GENERIC(dst, *ctx, *par, ix, iy, idx);
+			idx++;
+		}
+	}
+}
+
+static void hue2rgb(int hue, uint8_t *r, uint8_t *g, uint8_t *b) {
+	const int X = (60 - abs(hue % 120 - 60)) * 255 / 60;
+	const int C = 255;
+	switch (hue / 60 % 6) {
+		case 0: *r = C; *g = X; *b = 0; break;
+		case 1: *r = X; *g = C; *b = 0; break;
+		case 2: *r = 0; *g = C; *b = X; break;
+		case 3: *r = 0; *g = X; *b = C; break;
+		case 4: *r = X; *g = 0; *b = C; break;
+		case 5: *r = C; *g = 0; *b = X; break;
+	}
+}
+
+static void curve(int16_t iter, uint8_t *red, uint8_t *gre, uint8_t *blu, int16_t maxit) {
+#if 0
+	double x = (double)iter / maxit;
+	*red = sqrt(x) * 255;
+	*gre = (-x*x*x + x*x*3/2 + x/2) * 255;
+	*blu = x*x * 255;
+#else
+	hue2rgb(iter, red, gre, blu);
+#endif
+}
+
+static int64_t gettimestamp() {
+	struct timeval tv;
+	gettimeofday(&tv, NULL);
+	return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+double mandel_imgh(const Mandel *ctx, const Params *par) {
+	return par->imgw * ctx->h / ctx->w;
+}
+
+void mandel_render(uint8_t *dst, Mandel *ctx, const Params *par) {
+	ctx->imgh = mandel_imgh(ctx, par);
+
+	int64_t t1 = gettimestamp();
+
+#ifdef USE_GPU
+	CUDA_CHECK(cudaMemcpy(ctx->devPar, par, sizeof(Params), cudaMemcpyHostToDevice));
+	CUDA_CHECK(cudaMemcpy(ctx->devCtx, ctx, sizeof(Mandel), cudaMemcpyHostToDevice));
+	const int nblocks = (ctx->w * ctx->h + 1023) / 1024;
+	mandel_gpu<<<nblocks, 1024>>>(ctx->devImg, ctx->devCtx, ctx->devPar);
+
+	CUDA_CHECK(cudaMemcpy(ctx->img, ctx->devImg, ctx->w * ctx->h * sizeof(int16_t), cudaMemcpyDeviceToHost));
+#else
+	mandel_cpu(ctx->img, ctx, par);
+#endif
+
+	int64_t t2 = gettimestamp();
+
+	for (int i = 0; i < ctx->w * ctx->h; i++) {
+		if (ctx->img[i] == par->maxit) {
+			dst[3*i] = dst[3*i+1] = dst[3*i+2] = 0;
+		} else {
+			curve(ctx->img[i], &dst[3*i], &dst[3*i+1], &dst[3*i+2], par->maxit);
+		}
+	}
+
+	int64_t t3 = gettimestamp();
+
+	cout << "gpu part: " << (t2 - t1) / 1000000.0 << " sec   "
+	     << "cpu part: " << (t3 - t2) / 1000000 << " sec" << endl;
+}
+
+#if 0
+int main() {
+	Mandel *ctx = mandel_init(1920, 1080);
+	Params par = mandel_default_params();
+	par.cy = 0.5;
+	uint8_t *img = new uint8_t[3 * 1920 * 1080];
+	mandel_render(img, ctx, &par);
+	mandel_free(ctx);
+	bmp_rgb_encode_file("out.bmp", img, 1920, 1080);
+}
+#endif