main_GPU_fixed_polygons.c 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. #include "stdlib.h"
  2. #include "SDL2/SDL.h"
  3. #include "math.h"
  4. #include "time.h"
  5. #include <CL/cl.h>
  6. const char* kernel_source = " \
  7. int is_intersecting(float a, float b, float c, float d, float p, float q, float r, float s) { \
  8. float det, gamma, lambda; \
  9. \
  10. det = (c - a) * (s - q) - (r - p) * (d - b); \
  11. \
  12. if (det == 0) { \
  13. return 1; \
  14. } \
  15. else { \
  16. lambda = ((s - q) * (r - a) + (p - r) * (s - b)) / det; \
  17. gamma = ((b - d) * (r - a) + (c - a) * (s - b)) / det; \
  18. return (0 < lambda && lambda < 1) && (0 < gamma && gamma < 1); \
  19. } \
  20. } \
  21. \
  22. void norm(float dest[2], float a[2]){ \
  23. float magnitude = sqrt(a[0] * a[0] + a[1] * a[1]); \
  24. \
  25. dest[0] = a[0] / magnitude; \
  26. dest[1] = a[1] / magnitude; \
  27. } \
  28. \
  29. void sub(float dest[2], float a[2], float b[2]){ \
  30. dest[0] = a[0] - b[0]; \
  31. dest[1] = a[1] - b[1]; \
  32. } \
  33. \
  34. void add(float dest[2], float a[2], float b[2]){ \
  35. dest[0] = a[0] + b[0]; \
  36. dest[1] = a[1] + b[1]; \
  37. } \
  38. \
  39. __kernel void raster_kernel(__global float* polygons, __global uint* frame_buffer, int POLYGONS, int WIDTH, int HEIGHT, int SHOW_Z_BUFFER){ \
  40. int id_x = get_global_id(0); \
  41. int id_y = get_global_id(1); \
  42. \
  43. float z_pixel = 0; \
  44. uint frame_pixel = 0x22222222; \
  45. \
  46. for (int polygon = 0; polygon < POLYGONS; polygon++){ \
  47. int base = polygon * 9; \
  48. float x0 = polygons[base]; \
  49. float y0 = polygons[base + 1]; \
  50. float z0 = polygons[base + 2]; \
  51. float x1 = polygons[base + 3]; \
  52. float y1 = polygons[base + 4]; \
  53. float z1 = polygons[base + 5]; \
  54. float x2 = polygons[base + 6]; \
  55. float y2 = polygons[base + 7]; \
  56. float z2 = polygons[base + 8]; \
  57. \
  58. float smallest_x = x0; \
  59. float largest_x = x0; \
  60. float smallest_y = y0; \
  61. float largest_y = y0; \
  62. \
  63. for (int point = 0; point < 3; point++){ \
  64. float x = polygons[base + point * 3]; \
  65. float y = polygons[base + point * 3 + 1]; \
  66. \
  67. if (x > largest_x){ \
  68. largest_x = x; \
  69. } \
  70. \
  71. if (x < smallest_x){ \
  72. smallest_x = x; \
  73. } \
  74. \
  75. if (y > largest_y){ \
  76. largest_y = y; \
  77. } \
  78. \
  79. if (y < smallest_y){\
  80. smallest_y = y;\
  81. } \
  82. } \
  83. \
  84. smallest_x = fmin(smallest_x, 0); \
  85. largest_x = fmax(largest_x, WIDTH); \
  86. smallest_y = fmin(smallest_y, 0); \
  87. largest_y = fmax(largest_y, HEIGHT); \
  88. \
  89. if (id_x >= smallest_x && id_x <= largest_x && id_y >= smallest_y && id_y <= largest_y){ \
  90. int intersections = 0; \
  91. \
  92. intersections += is_intersecting(id_x, id_y, 10000, 100000, x0, y0, x1, y1); \
  93. intersections += is_intersecting(id_x, id_y, 10000, 100000, x1, y1, x2, y2); \
  94. intersections += is_intersecting(id_x, id_y, 10000, 100000, x2, y2, x0, y0); \
  95. \
  96. if (intersections % 2 == 0){ \
  97. continue; \
  98. } \
  99. \
  100. float denominator = (y1 - y2) * (x0 - x2) + (x2 - x1) * (y0 - y2); \
  101. float w0 = ((y1 - y2) * (id_x - x2) + (x2 - x1) * (id_y - y2)) / denominator; \
  102. float w1 = ((y2 - y0) * (id_x - x0) + (x0 - x2) * (id_y - y2)) / denominator; \
  103. float w2 = 1.0 - w0 - w1; \
  104. \
  105. if (denominator < 0) { \
  106. w0 = -w0; \
  107. w1 = -w1; \
  108. w2 = -w2; \
  109. denominator = -denominator; \
  110. } \
  111. \
  112. float z = w0 * z0 + w1 * z1 + w2 * z2; \
  113. \
  114. if (z < 0){ \
  115. z *= -1; \
  116. } \
  117. \
  118. if (z > z_pixel){ \
  119. z_pixel = z; \
  120. } \
  121. else { \
  122. continue; \
  123. } \
  124. \
  125. frame_pixel = 0xFFFFFFFF / POLYGONS * (polygon + 1); \
  126. } \
  127. } \
  128. \
  129. frame_buffer[id_y * WIDTH + id_x] = frame_pixel; \
  130. \
  131. if (!SHOW_Z_BUFFER){return;}\
  132. \
  133. float z = clamp(z_pixel, 0.0f, 800.0f);\
  134. \
  135. float norm_z = z / 800.0f;\
  136. \
  137. uchar intensity = (uchar)(norm_z * 255.0f);\
  138. \
  139. frame_buffer[id_y * WIDTH + id_x] = 0xFF000000 | (intensity << 16) | (intensity << 8) | intensity;\
  140. }\n";
  141. void erchk_func(cl_int error, int line, char *file){
  142. if (error != CL_SUCCESS){
  143. printf("ERROR :O %d, line %d at file %s\n", error, line, file);
  144. exit(1);
  145. }
  146. }
  147. #define erchk(error) erchk_func(error, __LINE__, __FILE__)
  148. const int WIDTH = 800;
  149. const int HEIGHT = 800;
  150. const int POLYGONS = 20000;
  151. const int SHOW_Z_BUFFER = 0;
  152. int main(){
  153. srand(time(NULL));
  154. float polygons[POLYGONS][3][3];
  155. cl_uint frame_buffer[WIDTH * HEIGHT];
  156. float z_buffer[WIDTH * HEIGHT];
  157. // ----- Check for Valid Platforms & GPUs
  158. cl_platform_id platform;
  159. cl_device_id device;
  160. cl_uint number_of_platforms, number_of_devices;
  161. clGetPlatformIDs(1, &platform, &number_of_platforms);
  162. if(number_of_platforms == 0){
  163. printf("No OpenCL Platforms");
  164. }
  165. clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &number_of_devices);
  166. if (number_of_devices == 0){
  167. printf("No GPU's Found");
  168. }
  169. // -----
  170. // ----- Setup OpenCL
  171. cl_int error;
  172. cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
  173. cl_command_queue queue = clCreateCommandQueue(context, device, 0, NULL);
  174. cl_mem input_memory_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * 3 * 3 * POLYGONS, polygons, &error);
  175. erchk(error);
  176. cl_mem output_memory_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_uint) * WIDTH * HEIGHT, NULL, &error);
  177. erchk(error);
  178. printf("%ld bytes\n", sizeof(float) * 3 * 3 * POLYGONS + WIDTH * HEIGHT * sizeof(cl_uint));
  179. cl_program kernel_program = clCreateProgramWithSource(context, 1, &kernel_source, NULL, &error);
  180. erchk(error);
  181. error = clBuildProgram(kernel_program, 1, &device, NULL, NULL, NULL);
  182. erchk(error);
  183. cl_kernel compiled_kernel = clCreateKernel(kernel_program, "raster_kernel", &error);
  184. erchk(error);
  185. erchk(clSetKernelArg(compiled_kernel, 0, sizeof(cl_mem), &input_memory_buffer));
  186. erchk(clSetKernelArg(compiled_kernel, 1, sizeof(cl_mem), &output_memory_buffer));
  187. erchk(clSetKernelArg(compiled_kernel, 2, sizeof(int), &POLYGONS));
  188. erchk(clSetKernelArg(compiled_kernel, 3, sizeof(int), &WIDTH));
  189. erchk(clSetKernelArg(compiled_kernel, 4, sizeof(int), &HEIGHT));
  190. erchk(clSetKernelArg(compiled_kernel, 5, sizeof(int), &SHOW_Z_BUFFER));
  191. size_t size_2d[2] = {WIDTH, HEIGHT};
  192. cl_uint pattern = 0x22222222;
  193. // -----
  194. SDL_Init(SDL_INIT_VIDEO);
  195. SDL_Window* window = SDL_CreateWindow("Rasterizer", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, WIDTH, HEIGHT, SDL_WINDOW_OPENGL);
  196. SDL_Renderer* renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED);
  197. SDL_Texture* texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, WIDTH, HEIGHT);
  198. int running = 1;
  199. int frame = 0;
  200. Uint64 start_time;
  201. double frame_time_ms;
  202. double fps;
  203. while (running) {
  204. start_time = SDL_GetPerformanceCounter();
  205. if (frame % 1 == 0){
  206. for (int p = 0; p < POLYGONS; p++){
  207. for (int point = 0; point < 3; point++){
  208. for (int i = 0; i < 3; i++){
  209. polygons[p][point][i] = rand() % WIDTH + 1;
  210. }
  211. }
  212. }
  213. }
  214. memset(&z_buffer, 0, sizeof(float) * WIDTH * HEIGHT);
  215. erchk(clEnqueueWriteBuffer(queue, input_memory_buffer, CL_TRUE, 0, sizeof(float) * 3 * 3 * POLYGONS, polygons, 0, NULL, NULL));
  216. erchk(clEnqueueFillBuffer(queue, output_memory_buffer, &pattern, sizeof(cl_uint), 0, sizeof(cl_uint) * WIDTH * HEIGHT, 0, NULL, NULL));
  217. erchk(clEnqueueNDRangeKernel(queue, compiled_kernel, 2, NULL, size_2d, NULL, 0, NULL, NULL));
  218. erchk(clFinish(queue));
  219. erchk(clEnqueueReadBuffer(queue, output_memory_buffer, CL_TRUE, 0, sizeof(cl_uint) * WIDTH * HEIGHT, &frame_buffer, 0, NULL, NULL));
  220. SDL_Event event;
  221. while (SDL_PollEvent(&event)){
  222. switch (event.type){
  223. case SDL_QUIT:
  224. running = 0;
  225. }
  226. }
  227. SDL_UpdateTexture(texture, NULL, frame_buffer, WIDTH * sizeof(cl_uint));
  228. SDL_RenderClear(renderer);
  229. SDL_RenderCopy(renderer, texture, NULL, NULL);
  230. SDL_RenderPresent(renderer);
  231. frame++;
  232. double delta_time = (double)(SDL_GetPerformanceCounter() - start_time) / (double)SDL_GetPerformanceFrequency();
  233. double fps = 1.0 / delta_time;
  234. printf("%lf fps\n", fps);
  235. printf("%d polygons\n", POLYGONS);
  236. }
  237. clReleaseMemObject(input_memory_buffer);
  238. clReleaseMemObject(output_memory_buffer);
  239. clReleaseKernel(compiled_kernel);
  240. clReleaseProgram(kernel_program);
  241. clReleaseCommandQueue(queue);
  242. clReleaseContext(context);
  243. SDL_DestroyTexture(texture);
  244. SDL_DestroyRenderer(renderer);
  245. SDL_DestroyWindow(window);
  246. SDL_Quit();
  247. }