src/cuda-fractals.cu


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305

#include <cuda_runtime.h>
#include <thrust/complex.h>
#include "fractals.h"

//These won't work/ need to be modifiied for the cuda version
//#include "grids.h"
//#include "fractals.h"

/*
 * Macro for checking CUDA errors
 */
#define CHECK(call) \
{ \
    const cudaError_t error = call; \
    if(error != cudaSuccess){ \
        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
        fprintf(stderr, "Code: %d, Reason: %s\n", error, cudaGetErrorString(error)); \
    } \
}

#ifndef CBASE
#define CBASE double
#endif

#define BLOCK_SIZE_X 16
#define BLOCK_SIZE_Y 16

// this ain't pretty, but this logic should never change for kernels
#define SET_ROW_COL \
    const size_t row = blockIdx.y * blockDim.y + threadIdx.y; \
    const size_t col = blockIdx.x * blockDim.x + threadIdx.x; \
    if(row >= rows || col >= cols) return


__device__
thrust::complex<CBASE> grid_to_complex(const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t row, const size_t col, const size_t rows, const size_t cols){
    const CBASE x_min = lower_left.real();
    const CBASE x_max = upper_right.real();
    const CBASE y_min = lower_left.imag();
    const CBASE y_max = upper_right.imag();

    const CBASE x_step = (x_max - x_min) / (CBASE)cols;
    const CBASE y_step = (y_max - y_min) / (CBASE)rows;

    const CBASE x = x_min + col * x_step;
    const CBASE y = y_min + row * y_step;
    const thrust::complex<CBASE> z(x,y);
    return z;
}

__device__
byte mandelbrot(const thrust::complex<CBASE> z0, const byte max_iterations){
    thrust::complex<CBASE> z = z0;
    byte iteration = 0;
    while(thrust::abs(z) <= 2 && iteration < max_iterations){
        z = z*z + z0;
        iteration++;
    }
    return iteration;
}

__global__
void mandelbrot_kernel(byte* grid_data, const byte max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){
    SET_ROW_COL;

    const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols);

    grid_data[row*cols + col] = mandelbrot(z, max_iterations);
}

__device__ 
byte tricorn(const thrust::complex<CBASE> z0, const byte max_iterations){
    thrust::complex<CBASE> z = z0;
    byte iteration = 0;
    while(thrust::abs(z) <= 2 && iteration < max_iterations){
        z = thrust::conj(z*z) + z0;
        iteration++;
    }
    return iteration;
}

__global__
void tricorn_kernel(byte* grid_data, const byte max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){
    SET_ROW_COL;

    const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols);

    grid_data[row*cols + col] = tricorn(z, max_iterations);
}

__device__
byte burning_ship(const thrust::complex<CBASE> z0, const byte max_iterations){
    thrust::complex<CBASE> z = z0;
    thrust::complex<CBASE> z_mod;
    byte iteration = 0;
    while(thrust::abs(z) <= 2 && iteration < max_iterations){
        z_mod = thrust::complex<CBASE>(fabs(z.real()), fabs(z.imag()));
        z = z_mod * z_mod + z0;
        iteration++;
    }
    return iteration;
}

__global__
void burning_ship_kernel(byte* grid_data, const byte max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){
    SET_ROW_COL;

    const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols);

    grid_data[row*cols + col] = burning_ship(z, max_iterations);
}

__device__
byte multibrot(const thrust::complex<CBASE> z0, const byte max_iterations, const double d){
    thrust::complex<CBASE> z = z0;
    byte iteration = 0;
    while(thrust::abs(z) <= 2 && iteration < max_iterations){
        z = thrust::pow(z, d) + z0;
        iteration++;
    }
    return iteration;
}

__global__
void multibrot_kernel(byte* grid_data, const double degree, const byte max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){
    SET_ROW_COL;

    const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols);

    grid_data[row*cols + col] = multibrot(z, max_iterations, degree);
}

__device__
byte multicorn(const thrust::complex<CBASE> z0, const byte max_iterations, const double d){
    thrust::complex<CBASE> z = z0;
    byte iteration = 0;
    while(thrust::abs(z) <= 2 && iteration < max_iterations){
        z = thrust::conj(thrust::pow(z, d)) + z0;
        iteration++;
    }
    return iteration;
}

__global__
void multicorn_kernel(byte* grid_data, const double degree, const byte max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){
    SET_ROW_COL;

    const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols);

    grid_data[row*cols + col] = multicorn(z, max_iterations, degree);
}

__device__
byte julia(const thrust::complex<CBASE> z0, const byte max_iterations, const thrust::complex<CBASE> c, const double R){
    thrust::complex<CBASE> z = z0;
    byte iteration = 0;
    while(thrust::abs(z) <= R && iteration < max_iterations){
        z = z*z + c;
        iteration++;
    }
    return iteration;
}

__global__
void julia_kernel(byte* grid_data, const thrust::complex<CBASE> constant, const double radius, const byte max_iterations, const thrust::complex<CBASE> lower_left, const thrust::complex<CBASE> upper_right, const size_t rows, const size_t cols){
    SET_ROW_COL;

    const auto z = grid_to_complex(lower_left, upper_right, row, col, rows, cols);

    grid_data[row*cols + col] = julia(z, max_iterations, constant, radius);
}

extern "C" {
void mandelbrot_grid(grid_t* grid, const grid_gen_params* params){
    const size_t size = grid->size;
    const size_t rows = grid->y;
    const size_t cols = grid->x;
    const byte max_iterations = grid->max_iterations;
    thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im);
    thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im);

    byte* d_grid_data;
    CHECK(cudaMalloc(&d_grid_data, size*sizeof(byte)));
    //TODO: find good sizes
    dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y);
    //dim3 grid_size(0,0);
    dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y);
    mandelbrot_kernel<<<grid_size, block_size>>>(d_grid_data, max_iterations, lower_left, upper_right, rows, cols);
    CHECK(cudaDeviceSynchronize());

    CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(byte), cudaMemcpyDeviceToHost));

    CHECK(cudaFree(d_grid_data));
    CHECK(cudaDeviceReset());
}

void tricorn_grid(grid_t* grid, const grid_gen_params* params){
    const size_t size = grid->size;
    const size_t rows = grid->y;
    const size_t cols = grid->x;
    const byte max_iterations = grid->max_iterations;
    thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im);
    thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im);

    byte* d_grid_data;
    CHECK(cudaMalloc(&d_grid_data, size*sizeof(byte)));
    dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y);
    dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y);
    tricorn_kernel<<<grid_size, block_size>>>(d_grid_data, max_iterations, lower_left, upper_right, rows, cols);
    CHECK(cudaDeviceSynchronize());

    CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(byte), cudaMemcpyDeviceToHost));

    CHECK(cudaFree(d_grid_data));
    CHECK(cudaDeviceReset());
}

void burning_ship_grid(grid_t* grid, const grid_gen_params* params){
    const size_t size = grid->size;
    const size_t rows = grid->y;
    const size_t cols = grid->x;
    const byte max_iterations = grid->max_iterations;
    thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im);
    thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im);

    byte* d_grid_data;
    CHECK(cudaMalloc(&d_grid_data, size*sizeof(byte)));
    dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y);
    dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y);
    burning_ship_kernel<<<grid_size, block_size>>>(d_grid_data, max_iterations, lower_left, upper_right, rows, cols);
    CHECK(cudaDeviceSynchronize());

    CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(byte), cudaMemcpyDeviceToHost));

    CHECK(cudaFree(d_grid_data));
    CHECK(cudaDeviceReset());
}

void multibrot_grid(grid_t* grid, const grid_gen_params* params){
    const size_t size = grid->size;
    const size_t rows = grid->y;
    const size_t cols = grid->x;
    const byte max_iterations = grid->max_iterations;
    const double degree = params->degree;
    thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im);
    thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im);

    byte* d_grid_data;
    CHECK(cudaMalloc(&d_grid_data, size*sizeof(byte)));
    dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y);
    dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y);
    multibrot_kernel<<<grid_size, block_size>>>(d_grid_data, degree, max_iterations, lower_left, upper_right, rows, cols);
    CHECK(cudaDeviceSynchronize());

    CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(byte), cudaMemcpyDeviceToHost));

    CHECK(cudaFree(d_grid_data));
    CHECK(cudaDeviceReset());
}

void multicorn_grid(grid_t* grid, const grid_gen_params* params){
    const size_t size = grid->size;
    const size_t rows = grid->y;
    const size_t cols = grid->x;
    const byte max_iterations = grid->max_iterations;
    const double degree = params->degree;
    thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im);
    thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im);

    byte* d_grid_data;
    CHECK(cudaMalloc(&d_grid_data, size*sizeof(byte)));
    dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y);
    dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y);
    multicorn_kernel<<<grid_size, block_size>>>(d_grid_data, degree, max_iterations, lower_left, upper_right, rows, cols);
    CHECK(cudaDeviceSynchronize());

    CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(byte), cudaMemcpyDeviceToHost));

    CHECK(cudaFree(d_grid_data));
    CHECK(cudaDeviceReset());
}

void julia_grid(grid_t* grid, const grid_gen_params* params){
    thrust::complex<CBASE> constant(params->cr.constant.re, params->cr.constant.im);
    const double radius = params->cr.radius;
    const size_t size = grid->size;
    const size_t rows = grid->y;
    const size_t cols = grid->x;
    const byte max_iterations = grid->max_iterations;
    thrust::complex<CBASE> lower_left(grid->lower_left.re, grid->lower_left.im);
    thrust::complex<CBASE> upper_right(grid->upper_right.re, grid->upper_right.im);

    byte* d_grid_data;
    CHECK(cudaMalloc(&d_grid_data, size*sizeof(byte)));
    dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y);
    dim3 grid_size((cols + block_size.x - 1) / block_size.x, (rows + block_size.y - 1) / block_size.y);
    julia_kernel<<<grid_size, block_size>>>(d_grid_data, constant, radius, max_iterations, lower_left, upper_right, rows, cols);
    CHECK(cudaDeviceSynchronize());

    CHECK(cudaMemcpy(grid->data, d_grid_data, size*sizeof(byte), cudaMemcpyDeviceToHost));

    CHECK(cudaFree(d_grid_data));
    CHECK(cudaDeviceReset());
}
}