aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/src
diff options
context:
space:
mode:
authorJP Appel <jeanpierre.appel01@gmail.com>2024-04-26 01:07:10 -0400
committerJP Appel <jeanpierre.appel01@gmail.com>2024-04-26 01:07:10 -0400
commit033863ab3d37574441183f65664d0c72ab2707be (patch)
tree0efc429873fdf8b6aeff5d78d711cf8702892e96 /src
parenta3862f8d92d9476eab29b0c4f74e1f1e8de92520 (diff)
first pass at cuda implementation
Diffstat (limited to 'src')
-rw-r--r--src/cuda-fractals.cu306
-rw-r--r--src/fractals.h29
-rw-r--r--src/grids.h5
-rw-r--r--src/precision.h1
4 files changed, 329 insertions, 12 deletions
diff --git a/src/cuda-fractals.cu b/src/cuda-fractals.cu
new file mode 100644
index 0000000..92be53f
--- /dev/null
+++ b/src/cuda-fractals.cu
@@ -0,0 +1,306 @@
+#include <cuda_runtime.h>
+#include <thrust/complex.h>
+#include "fractals.h"
+
+//These won't work/ need to be modifiied for the cuda version
+//#include "grids.h"
+//#include "fractals.h"
+
+/*
+ * Macro for checking CUDA errors
+ */
+#define CHECK(call) \
+{ \
+ const cudaError_t error = call; \
+ if(error != cudaSuccess){ \
+ fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
+ fprintf(stderr, "Code: %d, Reason: %s\n", error, cudaGetErrorString(error)); \
+ } \
+}
+
+#ifndef CBASE
+#define CBASE double
+#endif
+
+#define BLOCK_SIZE_X 16
+#define BLOCK_SIZE_Y 16
+
+// this ain't pretty, but this logic should never change for kernels
+#define SET_ROW_COL \
+ const size_t row = blockIdx.y * blockDim.y + threadIdx.y; \
+ const size_t col = blockIdx.x * blockDim.x + threadIdx.x; \
+ if(row >= rows || col >= cols) return
+
+
+__device__
+thrust::complex<CBASE> grid_to_complex(const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t row, const size_t col, const size_t rows, const size_t cols){
+ const CBASE x_min = lower_left.real();
+ const CBASE x_max = upper_right.real();
+ const CBASE y_min = lower_left.imag();
+ const CBASE y_max = upper_right.imag();
+
+ const CBASE x_step = (x_max - x_min) / (CBASE)cols;
+ const CBASE y_step = (y_max - y_min) / (CBASE)rows;
+
+ const CBASE x = x_min + col * x_step;
+ const CBASE y = y_min + row * y_step;
+ const thrust::complex<CBASE> z(x,y);
+ return z;
+}
+
+__device__
+size_t mandelbrot(const thrust::complex<CBASE> z0, const size_t max_iterations){
+ thrust::complex<CBASE> z = z0;
+ size_t iteration = 0;
+ while(thrust::abs(z) <= 2 && iteration < max_iterations){
+ z = z*z + z0;
+ iteration++;
+ }
+ return iteration;
+}
+
+__global__
+void mandelbrot_kernel(size_t* grid_data, const size_t max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){
+ SET_ROW_COL;
+
+ const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols);
+
+ grid_data[row*cols + col] = mandelbrot(z, max_iterations);
+}
+
+__device__
+size_t tricorn(const thrust::complex<CBASE> z0, const size_t max_iterations){
+ thrust::complex<CBASE> z = z0;
+ size_t iteration = 0;
+ while(thrust::abs(z) <= 2 && iteration < max_iterations){
+ z = thrust::conj(z*z) + z0;
+ iteration++;
+ }
+ return iteration;
+}
+
+__global__
+void tricorn_kernel(size_t* grid_data, const size_t max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){
+ SET_ROW_COL;
+
+ const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols);
+
+ grid_data[row*cols + col] = tricorn(z, max_iterations);
+}
+
+__device__
+size_t burning_ship(const thrust::complex<CBASE> z0, const size_t max_iterations){
+ thrust::complex<CBASE> z = z0;
+ thrust::complex<CBASE> z_mod;
+ size_t iteration = 0;
+ while(thrust::abs(z) <= 2 && iteration < max_iterations){
+ z_mod = thrust::complex<CBASE>(thrust::abs(z.real()), thrust::abs(z.imag()));
+ z = z_mod * zmod + z0;
+ iteration++;
+ }
+ return iteration;
+}
+
+__global__
+void burning_ship_kernel(size_t* grid_data, const size_t max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){
+ SET_ROW_COL;
+
+ const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols);
+
+ grid_data[row*cols + col] = burning_ship(z, max_iterations);
+}
+
+__device__
+size_t multibrot(const thrust::complex<CBASE> z0, const size_t max_iterations, const double d){
+ thrust::complex<CBASE> z = z0;
+ size_t iteration = 0;
+ while(thrust::abs(z) <= 2 && iteration < max_iterations){
+ z = thrust::pow(z, d) + z0;
+ iteration++;
+ }
+ return iteration;
+}
+
+__global__
+void multibrot_kernel(size_t* grid_data, const double degree, const size_t max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){
+ SET_ROW_COL;
+
+ const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols);
+
+ grid_data[row*cols + col] = multibrot(z, max_iterations, degree);
+}
+
+__device__
+size_t multicorn(const thrust::complex<CBASE> z0, const size_t max_iterations, const double d){
+ thrust::complex<CBASE> z = z0;
+ size_t iteration = 0;
+ while(thrust::abs(z) <= 2 && iteration < max_iterations){
+ z = thrust::conj(thrust::pow(z, d)) + z0;
+ iteration++;
+ }
+ return iteration;
+}
+
+__global__
+void multicorn_kernel(size_t* grid_data, const double degree, const size_t max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){
+ SET_ROW_COL;
+
+ const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols);
+
+ grid_data[row*cols + col] = multicorn(z, max_iterations, degree);
+}
+
+__device__
+size_t julia(const thrust::complex<CBASE> z0, const size_t max_iterations, const thrust::complex<CBASE> c, const double R){
+ thrust::complex<CBASE> z = z0;
+ size_t iteration = 0;
+ while(thrust::abs(z) <= R && iteration < max_iterations){
+ z = z*z + c;
+ iteration++;
+ }
+ return iteration;
+}
+
+__global__
+void julia_kernel(size_t* grid_data, const thrust:complex<CBASE> constant, const double radius, const size_t max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){
+ SET_ROW_COL;
+
+ const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols);
+
+ grid_data[row*cols + col] = julia(z, max_iterations, constant, radius);
+}
+
+extern "C" {
+void mandelbrot_grid(grid_t* grid, const grid_gen_params* params){
+ const size_t size = grid->size;
+ const size_t rows = grid->y;
+ const size_t cols = grid->x;
+ const size_t max_iterations = grid->max_iterations;
+ thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im);
+ thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im);
+
+ size_t* d_grid_data;
+ CHECK(cudaMalloc(&d_grid_data, size*sizeof(size_t)));
+ //TODO: find good sizes
+ dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y);
+ //dim3 grid_size(0,0);
+ dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y);
+ mandelbrot_kernel<<<grid_size, block_size>>>(d_grid_data, max_iterations, lower_left, upper_right, rows, cols);
+ CHECK(cudaDeviceSynchronize());
+
+ CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(size_t), cudaMemcpyDeviceToHost));
+
+ CHECK(cudaFree(d_grid_data));
+ CHECK(cudaDeviceReset());
+}
+
+void tricorn_grid(grid_t* grid, const grid_gen_params* params){
+ const size_t size = grid->size;
+ const size_t rows = grid->y;
+ const size_t cols = grid->x;
+ const size_t max_iterations = grid->max_iterations;
+ thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im);
+ thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im);
+
+ size_t* d_grid_data;
+ CHECK(cudaMalloc(&d_grid_data, size*sizeof(size_t)));
+ dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y);
+ dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y);
+ tricorn_kernel<<<grid_size, block_size>>>(d_grid_data, max_iterations, lower_left, upper_right, rows, cols);
+ CHECK(cudaDeviceSynchronize());
+
+ CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(size_t), cudaMemcpyDeviceToHost));
+
+ CHECK(cudaFree(d_grid_data));
+ CHECK(cudaDeviceReset());
+}
+
+void burning_ship_grid(grid_t* grid, const grid_gen_params* params){
+ const size_t size = grid->size;
+ const size_t rows = grid->y;
+ const size_t cols = grid->x;
+ const size_t max_iterations = grid->max_iterations;
+ thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im);
+ thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im);
+
+ size_t* d_grid_data;
+ CHECK(cudaMalloc(&d_grid_data, size*sizeof(size_t)));
+ dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y);
+ dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y);
+ burning_ship_kernel<<<grid_size, block_size>>>(d_grid_data, max_iterations, lower_left, upper_right, rows, cols);
+ CHECK(cudaDeviceSynchronize());
+
+ CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(size_t), cudaMemcpyDeviceToHost));
+
+ CHECK(cudaFree(d_grid_data));
+ CHECK(cudaDeviceReset());
+}
+
+void multibrot_grid(grid_t* grid, const grid_gen_params* params){
+ const size_t size = grid->size;
+ const size_t rows = grid->y;
+ const size_t cols = grid->x;
+ const size_t max_iterations = grid->max_iterations;
+ const double degree = params->degree;
+ thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im);
+ thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im);
+
+ size_t* d_grid_data;
+ CHECK(cudaMalloc(&d_grid_data, size*sizeof(size_t)));
+ dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y);
+ dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y);
+ multibrot_kernel<<<grid_size, block_size>>>(d_grid_data, degree, max_iterations, lower_left, upper_right, rows, cols);
+ CHECK(cudaDeviceSynchronize());
+
+ CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(size_t), cudaMemcpyDeviceToHost));
+
+ CHECK(cudaFree(d_grid_data));
+ CHECK(cudaDeviceReset());
+}
+
+void multicorn_grid(grid_t* grid, const grid_gen_params* params){
+ const size_t size = grid->size;
+ const size_t rows = grid->y;
+ const size_t cols = grid->x;
+ const size_t max_iterations = grid->max_iterations;
+ const double degree = params->degree;
+ thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im);
+ thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im);
+
+ size_t* d_grid_data;
+ CHECK(cudaMalloc(&d_grid_data, size*sizeof(size_t)));
+ dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y);
+ dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y);
+ multicorn_kernel<<<grid_size, block_size>>>(d_grid_data, degree, max_iterations, lower_left, upper_right, rows, cols);
+ CHECK(cudaDeviceSynchronize());
+
+ CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(size_t), cudaMemcpyDeviceToHost));
+
+ CHECK(cudaFree(d_grid_data));
+ CHECK(cudaDeviceReset());
+}
+
+void julia_grid(grid_t* grid, const grid_gen_params* params){
+ const complex_t constant = params->cr.constant;
+ const double radius = params->cr.radius;
+ const size_t size = grid->size;
+ const size_t rows = grid->y;
+ const size_t cols = grid->x;
+ const size_t max_iterations = grid->max_iterations;
+ const double degree = params->degree;
+ thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im);
+ thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im);
+
+ size_t* d_grid_data;
+ CHECK(cudaMalloc(&d_grid_data, size*sizeof(size_t)));
+ dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y);
+ dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y);
+ multicorn_kernel<<<grid_size, block_size>>>(d_grid_data, constant, radius, max_iterations, lower_left, upper_right, rows, cols);
+ CHECK(cudaDeviceSynchronize());
+
+ CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(size_t), cudaMemcpyDeviceToHost));
+
+ CHECK(cudaFree(d_grid_data));
+ CHECK(cudaDeviceReset());
+}
+}
diff --git a/src/fractals.h b/src/fractals.h
index 611e4e6..1c745d9 100644
--- a/src/fractals.h
+++ b/src/fractals.h
@@ -1,8 +1,9 @@
#pragma once
+#ifndef __NVCC__
#include <complex.h>
-#include <stddef.h>
-#include <stdint.h>
+#endif
+
#include "grids.h"
#include "precision.h"
@@ -16,20 +17,24 @@ typedef union {
typedef void (*fractal_generator)(grid_t* , const grid_gen_params* );
+#ifndef __NVCC__
size_t mandelbrot(const CBASE complex z0, const size_t max_iterations);
-void mandelbrot_grid(grid_t* grid, const grid_gen_params* params);
-
size_t tricorn(const CBASE complex z0, const size_t max_iterations);
-void tricorn_grid(grid_t* grid, const grid_gen_params* params);
-
size_t burning_ship(const CBASE complex z0, const size_t max_iterations);
-void burning_ship_grid(grid_t* grid, const grid_gen_params* params);
-
size_t multibrot(const CBASE complex z0, const size_t max_iterations, const double d);
-void multibrot_grid(grid_t* grid, const grid_gen_params* params);
-
size_t multicorn(const CBASE complex z0, const size_t max_iterations, const double d);
-void multicorn_grid(grid_t* grid, const grid_gen_params* params);
-
size_t julia(const CBASE complex z0, const CBASE complex c, const size_t max_iterations, const double R);
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+void mandelbrot_grid(grid_t* grid, const grid_gen_params* params);
+void tricorn_grid(grid_t* grid, const grid_gen_params* params);
+void burning_ship_grid(grid_t* grid, const grid_gen_params* params);
+void multibrot_grid(grid_t* grid, const grid_gen_params* params);
+void multicorn_grid(grid_t* grid, const grid_gen_params* params);
void julia_grid(grid_t* grid, const grid_gen_params* params);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/grids.h b/src/grids.h
index 942bb7f..b823c91 100644
--- a/src/grids.h
+++ b/src/grids.h
@@ -3,7 +3,9 @@
#include <stddef.h>
#include <stdio.h>
#include <stdbool.h>
+#ifndef __NVCC__
#include <complex.h>
+#endif
#include "precision.h"
//grid write errors
@@ -35,7 +37,10 @@ void free_grid(grid_t* grid);
bool grid_equal(const grid_t* grid1, const grid_t* grid2);
bool grid_allclose(const grid_t* grid1, const grid_t* grid2, const size_t max_error);
+#ifndef __NVCC__
CBASE complex grid_to_complex(const grid_t* grid, const size_t index);
+#endif
+
void zoom_grid(grid_t* grid, const CBASE magnification);
void print_grid_info(const grid_t* grid);
diff --git a/src/precision.h b/src/precision.h
index 4bca354..67e3a13 100644
--- a/src/precision.h
+++ b/src/precision.h
@@ -7,6 +7,7 @@
#ifdef EXTENDED_PRECISION
+#warning Compiling with extended precision, will lead to code incompatibility
#define CBASE long double
#define CREAL creall
#define CIMAG cimagl