From 08c6f6ea50aeadb3fae94d08b76556b5e2c88543 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Sat, 31 Mar 2018 09:57:57 +0200 Subject: Initial --- mandel.cu | 171 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 mandel.cu (limited to 'mandel.cu') diff --git a/mandel.cu b/mandel.cu new file mode 100644 index 0000000..064cf86 --- /dev/null +++ b/mandel.cu @@ -0,0 +1,171 @@ +#include +#include +#include +#include +#include "mandel.h" +#include "lodepng.h" +#include "bmp.h" + +using namespace std; + +#define USE_GPU + +#define CUDA_CHECK(expr) { \ + cudaError_t err_ = (expr); \ + if (err_ != cudaSuccess) { \ + cerr << "Cuda error: " << err_ << ": " << cudaGetErrorString(err_) << endl; \ + cerr << "On " << __FILE__ << ":" << __LINE__ << ": " << #expr << endl; \ + exit(1); \ + } \ + } + +struct Mandel { + int w, h; + int16_t *img; + int16_t *devImg; + Params *devPar; + Mandel *devCtx; // yes + + double imgh; // transient; calculated from params each time +}; + +Params mandel_default_params() { + Params par; + par.cx = -0.5; par.cy = 0.0; + par.imgw = 3.5; + par.maxit = 512; + return par; +} + +Mandel* mandel_init(int w, int h) { + Mandel *ctx = new Mandel; + ctx->w = w; + ctx->h = h; + ctx->img = new int16_t[w * h]; +#ifdef USE_GPU + CUDA_CHECK(cudaMalloc(&ctx->devImg, w * h * sizeof(int16_t))); + CUDA_CHECK(cudaMalloc(&ctx->devPar, sizeof(Params))); + CUDA_CHECK(cudaMalloc(&ctx->devCtx, sizeof(Mandel))); +#endif + return ctx; +} + +void mandel_free(Mandel *ctx) { + delete[] ctx->img; +#ifdef USE_GPU + CUDA_CHECK(cudaFree(ctx->devImg)); + CUDA_CHECK(cudaFree(ctx->devPar)); + CUDA_CHECK(cudaFree(ctx->devCtx)); +#endif + delete ctx; +} + +#define MANDEL_GENERIC(dst, ctx, par, ix, iy, idx) { \ + const double x = (par).cx - (par).imgw / 2 + (par).imgw * (ix) / ((ctx).w-1); \ + const double y = (par).cy - (ctx).imgh / 2 + (ctx).imgh * (iy) / ((ctx).h-1); \ + double a = x, b = y, a2 = a * a, b2 = b * b; \ + int16_t iter, maxiter = (par).maxit; \ + for (iter = 0; iter < maxiter && a2 + b2 < 4; iter++) { \ + b = 2 * a * b + y; a = a2 - b2 + x; \ + a2 = a * a; b2 = b * b; \ + } \ + (dst)[idx] = iter; \ + } + +__global__ void mandel_gpu(int16_t *dst, const Mandel *ctx, const Params *par) { + const int idx = blockDim.x * blockIdx.x + threadIdx.x; + const int ix = idx % (int)ctx->w, iy = ctx->h - 1 - idx / (int)ctx->w; + if (iy >= ctx->h) return; + + MANDEL_GENERIC(dst, *ctx, *par, ix, iy, idx); +} + +// Unused in GPU mode +__attribute__((unused)) +static inline void mandel_cpu(int16_t *dst, const Mandel *ctx, const Params *par) { + int idx = 0; + for (int iy = 0; iy < ctx->h; iy++) { + for (int ix = 0; ix < ctx->w; ix++) { + MANDEL_GENERIC(dst, *ctx, *par, ix, iy, idx); + idx++; + } + } +} + +static void hue2rgb(int hue, uint8_t *r, uint8_t *g, uint8_t *b) { + const int X = (60 - abs(hue % 120 - 60)) * 255 / 60; + const int C = 255; + switch (hue / 60 % 6) { + case 0: *r = C; *g = X; *b = 0; break; + case 1: *r = X; *g = C; *b = 0; break; + case 2: *r = 0; *g = C; *b = X; break; + case 3: *r = 0; *g = X; *b = C; break; + case 4: *r = X; *g = 0; *b = C; break; + case 5: *r = C; *g = 0; *b = X; break; + } +} + +static void curve(int16_t iter, uint8_t *red, uint8_t *gre, uint8_t *blu, int16_t maxit) { +#if 0 + double x = (double)iter / maxit; + *red = sqrt(x) * 255; + *gre = (-x*x*x + x*x*3/2 + x/2) * 255; + *blu = x*x * 255; +#else + hue2rgb(iter, red, gre, blu); +#endif +} + +static int64_t gettimestamp() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec; +} + +double mandel_imgh(const Mandel *ctx, const Params *par) { + return par->imgw * ctx->h / ctx->w; +} + +void mandel_render(uint8_t *dst, Mandel *ctx, const Params *par) { + ctx->imgh = mandel_imgh(ctx, par); + + int64_t t1 = gettimestamp(); + +#ifdef USE_GPU + CUDA_CHECK(cudaMemcpy(ctx->devPar, par, sizeof(Params), cudaMemcpyHostToDevice)); + CUDA_CHECK(cudaMemcpy(ctx->devCtx, ctx, sizeof(Mandel), cudaMemcpyHostToDevice)); + const int nblocks = (ctx->w * ctx->h + 1023) / 1024; + mandel_gpu<<>>(ctx->devImg, ctx->devCtx, ctx->devPar); + + CUDA_CHECK(cudaMemcpy(ctx->img, ctx->devImg, ctx->w * ctx->h * sizeof(int16_t), cudaMemcpyDeviceToHost)); +#else + mandel_cpu(ctx->img, ctx, par); +#endif + + int64_t t2 = gettimestamp(); + + for (int i = 0; i < ctx->w * ctx->h; i++) { + if (ctx->img[i] == par->maxit) { + dst[3*i] = dst[3*i+1] = dst[3*i+2] = 0; + } else { + curve(ctx->img[i], &dst[3*i], &dst[3*i+1], &dst[3*i+2], par->maxit); + } + } + + int64_t t3 = gettimestamp(); + + cout << "gpu part: " << (t2 - t1) / 1000000.0 << " sec " + << "cpu part: " << (t3 - t2) / 1000000 << " sec" << endl; +} + +#if 0 +int main() { + Mandel *ctx = mandel_init(1920, 1080); + Params par = mandel_default_params(); + par.cy = 0.5; + uint8_t *img = new uint8_t[3 * 1920 * 1080]; + mandel_render(img, ctx, &par); + mandel_free(ctx); + bmp_rgb_encode_file("out.bmp", img, 1920, 1080); +} +#endif -- cgit v1.2.3-70-g09d2