diff options
| -rw-r--r-- | src/cuda-fractals.cu | 306 | ||||
| -rw-r--r-- | src/fractals.h | 29 | ||||
| -rw-r--r-- | src/grids.h | 5 | ||||
| -rw-r--r-- | src/precision.h | 1 |
4 files changed, 329 insertions, 12 deletions
diff --git a/src/cuda-fractals.cu b/src/cuda-fractals.cu new file mode 100644 index 0000000..92be53f --- /dev/null +++ b/src/cuda-fractals.cu @@ -0,0 +1,306 @@ +#include <cuda_runtime.h> +#include <thrust/complex.h> +#include "fractals.h" + +//These won't work/ need to be modifiied for the cuda version +//#include "grids.h" +//#include "fractals.h" + +/* + * Macro for checking CUDA errors + */ +#define CHECK(call) \ +{ \ + const cudaError_t error = call; \ + if(error != cudaSuccess){ \ + fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ + fprintf(stderr, "Code: %d, Reason: %s\n", error, cudaGetErrorString(error)); \ + } \ +} + +#ifndef CBASE +#define CBASE double +#endif + +#define BLOCK_SIZE_X 16 +#define BLOCK_SIZE_Y 16 + +// this ain't pretty, but this logic should never change for kernels +#define SET_ROW_COL \ + const size_t row = blockIdx.y * blockDim.y + threadIdx.y; \ + const size_t col = blockIdx.x * blockDim.x + threadIdx.x; \ + if(row >= rows || col >= cols) return + + +__device__ +thrust::complex<CBASE> grid_to_complex(const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t row, const size_t col, const size_t rows, const size_t cols){ + const CBASE x_min = lower_left.real(); + const CBASE x_max = upper_right.real(); + const CBASE y_min = lower_left.imag(); + const CBASE y_max = upper_right.imag(); + + const CBASE x_step = (x_max - x_min) / (CBASE)cols; + const CBASE y_step = (y_max - y_min) / (CBASE)rows; + + const CBASE x = x_min + col * x_step; + const CBASE y = y_min + row * y_step; + const thrust::complex<CBASE> z(x,y); + return z; +} + +__device__ +size_t mandelbrot(const thrust::complex<CBASE> z0, const size_t max_iterations){ + thrust::complex<CBASE> z = z0; + size_t iteration = 0; + while(thrust::abs(z) <= 2 && iteration < max_iterations){ + z = z*z + z0; + iteration++; + } + return iteration; +} + +__global__ +void mandelbrot_kernel(size_t* grid_data, const size_t max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){ + SET_ROW_COL; + + const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols); + + grid_data[row*cols + col] = mandelbrot(z, max_iterations); +} + +__device__ +size_t tricorn(const thrust::complex<CBASE> z0, const size_t max_iterations){ + thrust::complex<CBASE> z = z0; + size_t iteration = 0; + while(thrust::abs(z) <= 2 && iteration < max_iterations){ + z = thrust::conj(z*z) + z0; + iteration++; + } + return iteration; +} + +__global__ +void tricorn_kernel(size_t* grid_data, const size_t max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){ + SET_ROW_COL; + + const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols); + + grid_data[row*cols + col] = tricorn(z, max_iterations); +} + +__device__ +size_t burning_ship(const thrust::complex<CBASE> z0, const size_t max_iterations){ + thrust::complex<CBASE> z = z0; + thrust::complex<CBASE> z_mod; + size_t iteration = 0; + while(thrust::abs(z) <= 2 && iteration < max_iterations){ + z_mod = thrust::complex<CBASE>(thrust::abs(z.real()), thrust::abs(z.imag())); + z = z_mod * zmod + z0; + iteration++; + } + return iteration; +} + +__global__ +void burning_ship_kernel(size_t* grid_data, const size_t max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){ + SET_ROW_COL; + + const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols); + + grid_data[row*cols + col] = burning_ship(z, max_iterations); +} + +__device__ +size_t multibrot(const thrust::complex<CBASE> z0, const size_t max_iterations, const double d){ + thrust::complex<CBASE> z = z0; + size_t iteration = 0; + while(thrust::abs(z) <= 2 && iteration < max_iterations){ + z = thrust::pow(z, d) + z0; + iteration++; + } + return iteration; +} + +__global__ +void multibrot_kernel(size_t* grid_data, const double degree, const size_t max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){ + SET_ROW_COL; + + const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols); + + grid_data[row*cols + col] = multibrot(z, max_iterations, degree); +} + +__device__ +size_t multicorn(const thrust::complex<CBASE> z0, const size_t max_iterations, const double d){ + thrust::complex<CBASE> z = z0; + size_t iteration = 0; + while(thrust::abs(z) <= 2 && iteration < max_iterations){ + z = thrust::conj(thrust::pow(z, d)) + z0; + iteration++; + } + return iteration; +} + +__global__ +void multicorn_kernel(size_t* grid_data, const double degree, const size_t max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){ + SET_ROW_COL; + + const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols); + + grid_data[row*cols + col] = multicorn(z, max_iterations, degree); +} + +__device__ +size_t julia(const thrust::complex<CBASE> z0, const size_t max_iterations, const thrust::complex<CBASE> c, const double R){ + thrust::complex<CBASE> z = z0; + size_t iteration = 0; + while(thrust::abs(z) <= R && iteration < max_iterations){ + z = z*z + c; + iteration++; + } + return iteration; +} + +__global__ +void julia_kernel(size_t* grid_data, const thrust:complex<CBASE> constant, const double radius, const size_t max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){ + SET_ROW_COL; + + const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols); + + grid_data[row*cols + col] = julia(z, max_iterations, constant, radius); +} + +extern "C" { +void mandelbrot_grid(grid_t* grid, const grid_gen_params* params){ + const size_t size = grid->size; + const size_t rows = grid->y; + const size_t cols = grid->x; + const size_t max_iterations = grid->max_iterations; + thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im); + thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im); + + size_t* d_grid_data; + CHECK(cudaMalloc(&d_grid_data, size*sizeof(size_t))); + //TODO: find good sizes + dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y); + //dim3 grid_size(0,0); + dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y); + mandelbrot_kernel<<<grid_size, block_size>>>(d_grid_data, max_iterations, lower_left, upper_right, rows, cols); + CHECK(cudaDeviceSynchronize()); + + CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(size_t), cudaMemcpyDeviceToHost)); + + CHECK(cudaFree(d_grid_data)); + CHECK(cudaDeviceReset()); +} + +void tricorn_grid(grid_t* grid, const grid_gen_params* params){ + const size_t size = grid->size; + const size_t rows = grid->y; + const size_t cols = grid->x; + const size_t max_iterations = grid->max_iterations; + thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im); + thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im); + + size_t* d_grid_data; + CHECK(cudaMalloc(&d_grid_data, size*sizeof(size_t))); + dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y); + dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y); + tricorn_kernel<<<grid_size, block_size>>>(d_grid_data, max_iterations, lower_left, upper_right, rows, cols); + CHECK(cudaDeviceSynchronize()); + + CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(size_t), cudaMemcpyDeviceToHost)); + + CHECK(cudaFree(d_grid_data)); + CHECK(cudaDeviceReset()); +} + +void burning_ship_grid(grid_t* grid, const grid_gen_params* params){ + const size_t size = grid->size; + const size_t rows = grid->y; + const size_t cols = grid->x; + const size_t max_iterations = grid->max_iterations; + thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im); + thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im); + + size_t* d_grid_data; + CHECK(cudaMalloc(&d_grid_data, size*sizeof(size_t))); + dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y); + dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y); + burning_ship_kernel<<<grid_size, block_size>>>(d_grid_data, max_iterations, lower_left, upper_right, rows, cols); + CHECK(cudaDeviceSynchronize()); + + CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(size_t), cudaMemcpyDeviceToHost)); + + CHECK(cudaFree(d_grid_data)); + CHECK(cudaDeviceReset()); +} + +void multibrot_grid(grid_t* grid, const grid_gen_params* params){ + const size_t size = grid->size; + const size_t rows = grid->y; + const size_t cols = grid->x; + const size_t max_iterations = grid->max_iterations; + const double degree = params->degree; + thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im); + thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im); + + size_t* d_grid_data; + CHECK(cudaMalloc(&d_grid_data, size*sizeof(size_t))); + dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y); + dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y); + multibrot_kernel<<<grid_size, block_size>>>(d_grid_data, degree, max_iterations, lower_left, upper_right, rows, cols); + CHECK(cudaDeviceSynchronize()); + + CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(size_t), cudaMemcpyDeviceToHost)); + + CHECK(cudaFree(d_grid_data)); + CHECK(cudaDeviceReset()); +} + +void multicorn_grid(grid_t* grid, const grid_gen_params* params){ + const size_t size = grid->size; + const size_t rows = grid->y; + const size_t cols = grid->x; + const size_t max_iterations = grid->max_iterations; + const double degree = params->degree; + thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im); + thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im); + + size_t* d_grid_data; + CHECK(cudaMalloc(&d_grid_data, size*sizeof(size_t))); + dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y); + dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y); + multicorn_kernel<<<grid_size, block_size>>>(d_grid_data, degree, max_iterations, lower_left, upper_right, rows, cols); + CHECK(cudaDeviceSynchronize()); + + CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(size_t), cudaMemcpyDeviceToHost)); + + CHECK(cudaFree(d_grid_data)); + CHECK(cudaDeviceReset()); +} + +void julia_grid(grid_t* grid, const grid_gen_params* params){ + const complex_t constant = params->cr.constant; + const double radius = params->cr.radius; + const size_t size = grid->size; + const size_t rows = grid->y; + const size_t cols = grid->x; + const size_t max_iterations = grid->max_iterations; + const double degree = params->degree; + thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im); + thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im); + + size_t* d_grid_data; + CHECK(cudaMalloc(&d_grid_data, size*sizeof(size_t))); + dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y); + dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y); + multicorn_kernel<<<grid_size, block_size>>>(d_grid_data, constant, radius, max_iterations, lower_left, upper_right, rows, cols); + CHECK(cudaDeviceSynchronize()); + + CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(size_t), cudaMemcpyDeviceToHost)); + + CHECK(cudaFree(d_grid_data)); + CHECK(cudaDeviceReset()); +} +} diff --git a/src/fractals.h b/src/fractals.h index 611e4e6..1c745d9 100644 --- a/src/fractals.h +++ b/src/fractals.h @@ -1,8 +1,9 @@ #pragma once +#ifndef __NVCC__ #include <complex.h> -#include <stddef.h> -#include <stdint.h> +#endif + #include "grids.h" #include "precision.h" @@ -16,20 +17,24 @@ typedef union { typedef void (*fractal_generator)(grid_t* , const grid_gen_params* ); +#ifndef __NVCC__ size_t mandelbrot(const CBASE complex z0, const size_t max_iterations); -void mandelbrot_grid(grid_t* grid, const grid_gen_params* params); - size_t tricorn(const CBASE complex z0, const size_t max_iterations); -void tricorn_grid(grid_t* grid, const grid_gen_params* params); - size_t burning_ship(const CBASE complex z0, const size_t max_iterations); -void burning_ship_grid(grid_t* grid, const grid_gen_params* params); - size_t multibrot(const CBASE complex z0, const size_t max_iterations, const double d); -void multibrot_grid(grid_t* grid, const grid_gen_params* params); - size_t multicorn(const CBASE complex z0, const size_t max_iterations, const double d); -void multicorn_grid(grid_t* grid, const grid_gen_params* params); - size_t julia(const CBASE complex z0, const CBASE complex c, const size_t max_iterations, const double R); +#endif + +#ifdef __cplusplus +extern "C" { +#endif +void mandelbrot_grid(grid_t* grid, const grid_gen_params* params); +void tricorn_grid(grid_t* grid, const grid_gen_params* params); +void burning_ship_grid(grid_t* grid, const grid_gen_params* params); +void multibrot_grid(grid_t* grid, const grid_gen_params* params); +void multicorn_grid(grid_t* grid, const grid_gen_params* params); void julia_grid(grid_t* grid, const grid_gen_params* params); +#ifdef __cplusplus +} +#endif diff --git a/src/grids.h b/src/grids.h index 942bb7f..b823c91 100644 --- a/src/grids.h +++ b/src/grids.h @@ -3,7 +3,9 @@ #include <stddef.h> #include <stdio.h> #include <stdbool.h> +#ifndef __NVCC__ #include <complex.h> +#endif #include "precision.h" //grid write errors @@ -35,7 +37,10 @@ void free_grid(grid_t* grid); bool grid_equal(const grid_t* grid1, const grid_t* grid2); bool grid_allclose(const grid_t* grid1, const grid_t* grid2, const size_t max_error); +#ifndef __NVCC__ CBASE complex grid_to_complex(const grid_t* grid, const size_t index); +#endif + void zoom_grid(grid_t* grid, const CBASE magnification); void print_grid_info(const grid_t* grid); diff --git a/src/precision.h b/src/precision.h index 4bca354..67e3a13 100644 --- a/src/precision.h +++ b/src/precision.h @@ -7,6 +7,7 @@ #ifdef EXTENDED_PRECISION +#warning Compiling with extended precision, will lead to code incompatibility #define CBASE long double #define CREAL creall #define CIMAG cimagl |
