main_GPU.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. #include "stdlib.h"
  2. #include "SDL2/SDL.h"
  3. #include "math.h"
  4. #include "time.h"
  5. #include <CL/cl.h>
  6. #include "rasteriver.h"
  7. #include <stdarg.h>
  8. const char* kernel_source = " \
  9. int is_intersecting(float a, float b, float c, float d, float p, float q, float r, float s) { \
  10. float det, gamma, lambda; \
  11. \
  12. det = (c - a) * (s - q) - (r - p) * (d - b); \
  13. \
  14. if (det == 0) { \
  15. return 1; \
  16. } \
  17. else { \
  18. lambda = ((s - q) * (r - a) + (p - r) * (s - b)) / det; \
  19. gamma = ((b - d) * (r - a) + (c - a) * (s - b)) / det; \
  20. return (0 < lambda && lambda < 1) && (0 < gamma && gamma < 1); \
  21. } \
  22. } \
  23. \
  24. void norm(float dest[2], float a[2]){ \
  25. float magnitude = sqrt(a[0] * a[0] + a[1] * a[1]); \
  26. \
  27. dest[0] = a[0] / magnitude; \
  28. dest[1] = a[1] / magnitude; \
  29. } \
  30. \
  31. void sub(float dest[2], float a[2], float b[2]){ \
  32. dest[0] = a[0] - b[0]; \
  33. dest[1] = a[1] - b[1]; \
  34. } \
  35. \
  36. void add(float dest[2], float a[2], float b[2]){ \
  37. dest[0] = a[0] + b[0]; \
  38. dest[1] = a[1] + b[1]; \
  39. } \
  40. \
  41. __kernel void raster_kernel(__global float* polygons, __global uint* frame_buffer, int polygon_count, int width, int height, int show_z_buffer){ \
  42. int id_x = get_global_id(0); \
  43. int id_y = get_global_id(1); \
  44. \
  45. float z_pixel = 0; \
  46. uint frame_pixel = 0x22222222; \
  47. \
  48. for (int polygon = 0; polygon < polygon_count; polygon++){ \
  49. int base = polygon * 9; \
  50. float x0 = polygons[base]; \
  51. float y0 = polygons[base + 1]; \
  52. float z0 = polygons[base + 2]; \
  53. float x1 = polygons[base + 3]; \
  54. float y1 = polygons[base + 4]; \
  55. float z1 = polygons[base + 5]; \
  56. float x2 = polygons[base + 6]; \
  57. float y2 = polygons[base + 7]; \
  58. float z2 = polygons[base + 8]; \
  59. \
  60. float smallest_x = x0; \
  61. float largest_x = x0; \
  62. float smallest_y = y0; \
  63. float largest_y = y0; \
  64. \
  65. for (int point = 0; point < 3; point++){ \
  66. float x = polygons[base + point * 3]; \
  67. float y = polygons[base + point * 3 + 1]; \
  68. \
  69. if (x > largest_x){ \
  70. largest_x = x; \
  71. } \
  72. \
  73. if (x < smallest_x){ \
  74. smallest_x = x; \
  75. } \
  76. \
  77. if (y > largest_y){ \
  78. largest_y = y; \
  79. } \
  80. \
  81. if (y < smallest_y){\
  82. smallest_y = y;\
  83. } \
  84. } \
  85. \
  86. smallest_x = fmin(smallest_x, 0); \
  87. largest_x = fmax(largest_x, width); \
  88. smallest_y = fmin(smallest_y, 0); \
  89. largest_y = fmax(largest_y, height); \
  90. \
  91. if (id_x >= smallest_x && id_x <= largest_x && id_y >= smallest_y && id_y <= largest_y){ \
  92. int intersections = 0; \
  93. \
  94. intersections += is_intersecting(id_x, id_y, 10000, 100000, x0, y0, x1, y1); \
  95. intersections += is_intersecting(id_x, id_y, 10000, 100000, x1, y1, x2, y2); \
  96. intersections += is_intersecting(id_x, id_y, 10000, 100000, x2, y2, x0, y0); \
  97. \
  98. if (intersections % 2 == 0){ \
  99. continue; \
  100. } \
  101. \
  102. float denominator = (y1 - y2) * (x0 - x2) + (x2 - x1) * (y0 - y2); \
  103. float w0 = ((y1 - y2) * (id_x - x2) + (x2 - x1) * (id_y - y2)) / denominator; \
  104. float w1 = ((y2 - y0) * (id_x - x0) + (x0 - x2) * (id_y - y2)) / denominator; \
  105. float w2 = 1.0 - w0 - w1; \
  106. \
  107. if (denominator < 0) { \
  108. w0 = -w0; \
  109. w1 = -w1; \
  110. w2 = -w2; \
  111. denominator = -denominator; \
  112. } \
  113. \
  114. float z = w0 * z0 + w1 * z1 + w2 * z2; \
  115. \
  116. if (z < 0){ \
  117. z *= -1; \
  118. } \
  119. \
  120. if (z > z_pixel){ \
  121. z_pixel = z; \
  122. } \
  123. else { \
  124. continue; \
  125. } \
  126. \
  127. frame_pixel = 0xFFFFFFFF / polygon_count * (polygon + 1); \
  128. } \
  129. } \
  130. \
  131. frame_buffer[id_y * width + id_x] = frame_pixel; \
  132. \
  133. if (!show_z_buffer){return;}\
  134. \
  135. float z = clamp(z_pixel, 0.0f, 800.0f);\
  136. \
  137. float norm_z = z / 800.0f;\
  138. \
  139. uchar intensity = (uchar)(norm_z * 255.0f);\
  140. \
  141. frame_buffer[id_y * width + id_x] = 0xFF000000 | (intensity << 16) | (intensity << 8) | intensity;\
  142. }\n";
  143. typedef int RI_result;
  144. typedef cl_uint RI_uint;
  145. const RI_result RI_ERROR = -1;
  146. const RI_result RI_SUCCESS = 0;
  147. const RI_result RI_NOT_RUNNING = -2;
  148. const RI_result RI_RUNNING = 1;
  149. RI_result erchk_func(cl_int error, int line, char *file){
  150. if (error != CL_SUCCESS){
  151. printf("OpenCL Error: %d at line %d at file %s\n", error, line, file);
  152. return RI_ERROR;
  153. }
  154. return RI_SUCCESS;
  155. }
  156. #define erchk(error) erchk_func(error, __LINE__, __FILE__)
  157. // ----- Rasteriver Vars
  158. int width;
  159. int height;
  160. int show_z_buffer = 0;
  161. int polygon_count = 20000;
  162. float *polygons = NULL;
  163. int running = 1;
  164. int frame = 0;
  165. int show_debug = 0;
  166. // -----
  167. // ----- Rendering Vars
  168. SDL_Window* window;
  169. SDL_Renderer* renderer;
  170. SDL_Texture* texture;
  171. RI_uint *frame_buffer;
  172. float *z_buffer;
  173. // -----
  174. // ----- OpenCL Vars
  175. cl_platform_id platform;
  176. cl_device_id device;
  177. RI_uint number_of_platforms, number_of_devices;
  178. cl_int error;
  179. cl_context context;
  180. cl_command_queue queue;
  181. cl_mem input_memory_buffer;
  182. cl_mem output_memory_buffer;
  183. cl_program kernel_program;
  184. cl_kernel compiled_kernel;
  185. size_t size_2d[2];
  186. RI_uint pattern;
  187. // -----
  188. RI_result debug(char *string, ...){
  189. if (!show_debug){
  190. return RI_ERROR;
  191. }
  192. va_list args;
  193. va_start(args, string);
  194. vprintf(strcat("[RasterIver] ", string), args);
  195. va_end(args);
  196. return RI_SUCCESS;
  197. }
  198. RI_result RI_SetDebugFlag(int RI_ShowDebugFlag){
  199. show_debug = RI_ShowDebugFlag;
  200. return RI_SUCCESS;
  201. }
  202. RI_result Rendering_init(char *title) {
  203. debug("Initializing Rendering...");
  204. frame_buffer = malloc(sizeof(RI_uint) * width * height);
  205. z_buffer = malloc(sizeof(float) * width * height);
  206. if (SDL_Init(SDL_INIT_VIDEO) < 0) {
  207. debug("SDL_Init failed");
  208. return RI_ERROR;
  209. }
  210. if (width <= 0 || height <= 0) {
  211. debug("Invalid width or height");
  212. return RI_ERROR;
  213. }
  214. window = SDL_CreateWindow(title, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, width, height, SDL_WINDOW_OPENGL);
  215. if (!window) {
  216. debug("SDL_CreateWindow failed");
  217. return RI_ERROR;
  218. }
  219. renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED);
  220. if (!renderer) {
  221. debug("SDL_CreateRenderer failed");
  222. return RI_ERROR;
  223. }
  224. texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, width, height);
  225. if (!texture) {
  226. debug("SDL_CreateTexture failed");
  227. return RI_ERROR;
  228. }
  229. debug("Initialized Rendering");
  230. return RI_SUCCESS;
  231. }
  232. RI_result RI_ShowZBuffer(int RI_ShowZBufferFlag){
  233. show_z_buffer = RI_ShowZBufferFlag;
  234. return RI_SUCCESS;
  235. }
  236. RI_result RI_SetBackground(RI_uint RI_BackgroundColor){
  237. pattern = RI_BackgroundColor;
  238. return RI_SUCCESS;
  239. }
  240. RI_result OpenCL_init(){
  241. debug("Initializing OpenCL...");
  242. clGetPlatformIDs(1, &platform, &number_of_platforms);
  243. if(number_of_platforms == 0){
  244. printf("No OpenCL Platforms\n");
  245. return RI_ERROR;
  246. }
  247. clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &number_of_devices);
  248. if (number_of_devices == 0){
  249. printf("No Valid GPU's Found\n");
  250. return RI_ERROR;
  251. }
  252. context = clCreateContext(NULL, 1, &device, NULL, NULL, &error);
  253. erchk(error);
  254. queue = clCreateCommandQueue(context, device, 0, &error);
  255. erchk(error);
  256. output_memory_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(RI_uint) * width * height, NULL, &error);
  257. erchk(error);
  258. kernel_program = clCreateProgramWithSource(context, 1, &kernel_source, NULL, &error);
  259. erchk(error);
  260. error = clBuildProgram(kernel_program, 1, &device, NULL, NULL, NULL);
  261. erchk(error);
  262. compiled_kernel = clCreateKernel(kernel_program, "raster_kernel", &error);
  263. erchk(error);
  264. erchk(clSetKernelArg(compiled_kernel, 0, sizeof(cl_mem), &input_memory_buffer));
  265. erchk(clSetKernelArg(compiled_kernel, 1, sizeof(cl_mem), &output_memory_buffer));
  266. erchk(clSetKernelArg(compiled_kernel, 2, sizeof(int), &polygon_count));
  267. erchk(clSetKernelArg(compiled_kernel, 3, sizeof(int), &width));
  268. erchk(clSetKernelArg(compiled_kernel, 4, sizeof(int), &height));
  269. erchk(clSetKernelArg(compiled_kernel, 5, sizeof(int), &show_z_buffer));
  270. size_2d[0] = width;
  271. size_2d[1] = height;
  272. pattern = 0x22222222;
  273. debug("Initialized OpenCL");
  274. return RI_SUCCESS;
  275. }
  276. RI_result RI_Stop(){
  277. debug("Stopping...");
  278. running = 0;
  279. clReleaseMemObject(input_memory_buffer);
  280. clReleaseMemObject(output_memory_buffer);
  281. clReleaseKernel(compiled_kernel);
  282. clReleaseProgram(kernel_program);
  283. clReleaseCommandQueue(queue);
  284. clReleaseContext(context);
  285. SDL_DestroyTexture(texture);
  286. SDL_DestroyRenderer(renderer);
  287. SDL_DestroyWindow(window);
  288. SDL_Quit();
  289. if (polygons != NULL)
  290. free(polygons);
  291. else
  292. debug("Polygons Was Unset on Stop");
  293. if (frame_buffer != NULL)
  294. free(frame_buffer);
  295. else
  296. debug("Frame-Buffer Was Unset on Stop");
  297. if (z_buffer != NULL)
  298. free(z_buffer);
  299. else
  300. debug("Z-Buffer Was Unset on Stop");
  301. debug("Stopped");
  302. return RI_SUCCESS;
  303. }
  304. RI_result RI_RequestPolygons(int RI_PolygonsToRequest){
  305. polygon_count = RI_PolygonsToRequest;
  306. debug("Requesting %d Polygons...\n", polygon_count);
  307. if (polygons != NULL){
  308. free(polygons);
  309. }
  310. polygons = malloc(sizeof(float) * 3 * 3 * polygon_count);
  311. if (polygons == NULL){
  312. debug("Malloc Error");
  313. return RI_ERROR;
  314. }
  315. for (int p = 0; p < polygon_count; p++){
  316. for (int point = 0; point < 3; point++){
  317. for (int i = 0; i < 3; i++){
  318. polygons[i] = rand();
  319. }
  320. }
  321. }
  322. input_memory_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * 3 * 3 * polygon_count, polygons, &error);
  323. if (input_memory_buffer == NULL) {
  324. debug("OpenCL buffer creation failed for polygons.\n");
  325. }
  326. debug("Request for %d Polygons Granted\n", polygon_count);
  327. return erchk(error);
  328. }
  329. RI_result RI_Tick(){
  330. debug("Ticking...");
  331. if (running) {
  332. if (polygons == NULL){
  333. debug("Polygons is Unset");
  334. return RI_ERROR;
  335. }
  336. if (frame_buffer == NULL){
  337. debug("Frame-Buffer is Unset");
  338. return RI_ERROR;
  339. }
  340. if (z_buffer == NULL){
  341. debug("Z-Buffer is Unset");
  342. return RI_ERROR;
  343. }
  344. if (frame % 1 == 0){
  345. for (int p = 0; p < polygon_count; p++){
  346. for (int point = 0; point < 3; point++){
  347. for (int i = 0; i < 3; i++){
  348. polygons[i] = rand() % width + 1;
  349. }
  350. }
  351. }
  352. }
  353. //memset(&z_buffer, 0, sizeof(float) * width * height);
  354. erchk(clEnqueueWriteBuffer(queue, input_memory_buffer, CL_TRUE, 0, sizeof(float) * 3 * 3 * polygon_count, polygons, 0, NULL, NULL));
  355. erchk(clEnqueueFillBuffer(queue, output_memory_buffer, &pattern, sizeof(RI_uint), 0, sizeof(RI_uint) * width * height, 0, NULL, NULL));
  356. erchk(clEnqueueNDRangeKernel(queue, compiled_kernel, 2, NULL, size_2d, NULL, 0, NULL, NULL));
  357. erchk(clFinish(queue));
  358. erchk(clEnqueueReadBuffer(queue, output_memory_buffer, CL_TRUE, 0, sizeof(RI_uint) * width * height, &frame_buffer, 0, NULL, NULL));
  359. SDL_Event event;
  360. while (SDL_PollEvent(&event)){
  361. switch (event.type){
  362. case SDL_QUIT:
  363. running = 0;
  364. }
  365. }
  366. SDL_UpdateTexture(texture, NULL, frame_buffer, width * sizeof(RI_uint));
  367. SDL_RenderClear(renderer);
  368. SDL_RenderCopy(renderer, texture, NULL, NULL);
  369. SDL_RenderPresent(renderer);
  370. frame++;
  371. debug("Ticked");
  372. return RI_SUCCESS;
  373. }
  374. else {
  375. return RI_ERROR;
  376. }
  377. }
  378. RI_result RI_IsRunning(){
  379. if (running){
  380. return RI_RUNNING;
  381. }
  382. else {
  383. return RI_NOT_RUNNING;
  384. }
  385. }
  386. RI_result RI_Init(int RI_WindowWidth, int RI_WindowHeight, char *RI_WindowTitle){
  387. srand(time(NULL));
  388. width = RI_WindowWidth;
  389. height = RI_WindowHeight;
  390. if(OpenCL_init() == RI_ERROR){
  391. return RI_ERROR;
  392. }
  393. Rendering_init(RI_WindowTitle);
  394. return RI_SUCCESS;
  395. }