kernels.cl 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869
  1. #pragma OPENCL EXTENSION cl_khr_fp64 : enable
  2. /* +++ BENEDICTIO CODICIS +++
  3. May the segfaults sleep,
  4. May the pointers align,
  5. May the UB remain uninvoked,
  6. And may this program return 0 in peace.
  7. -PreistGPT
  8. */
  9. typedef struct {
  10. double w;
  11. double x;
  12. double y;
  13. double z;
  14. } RI_vector_4;
  15. typedef struct {
  16. double x;
  17. double y;
  18. double z;
  19. } RI_vector_3;
  20. typedef struct {
  21. double x;
  22. double y;
  23. } RI_vector_2;
  24. typedef struct {
  25. ushort width;
  26. ushort height; // actual height of the image INCLUDING all frames
  27. uint index;
  28. ushort frame_count;
  29. ushort current_frame;
  30. ushort frame_height; // height of each frame
  31. } RI_texture;
  32. typedef struct {
  33. RI_vector_3 position_0, position_1, position_2;
  34. RI_vector_3 normal_0, normal_1, normal_2;
  35. RI_vector_2 uv_0, uv_1, uv_2;
  36. short min_screen_x, max_screen_x, min_screen_y, max_screen_y;
  37. uchar should_render;
  38. uchar is_split;
  39. uchar is_transformed;
  40. uchar is_shrunk;
  41. RI_texture texture;
  42. } RI_renderable_face;
  43. typedef struct {
  44. RI_vector_3 position_0;
  45. RI_vector_3 position_1;
  46. RI_vector_3 position_2;
  47. RI_vector_3 normal_0;
  48. RI_vector_3 normal_1;
  49. RI_vector_3 normal_2;
  50. RI_vector_2 uv_0;
  51. RI_vector_2 uv_1;
  52. RI_vector_2 uv_2;
  53. uchar should_render;
  54. } RI_face;
  55. typedef struct {
  56. RI_vector_3 position;
  57. RI_vector_4 rotation;
  58. RI_vector_3 scale;;
  59. uchar active;
  60. uchar has_normals;
  61. uchar has_uvs;
  62. ushort material_index;
  63. uint face_index;
  64. uint face_count;
  65. } RI_actor;
  66. typedef struct {
  67. RI_vector_3 position;
  68. RI_vector_4 rotation;
  69. float FOV, min_clip, max_clip;
  70. } RI_camera;
  71. // value-wise multiplacation.
  72. // multiply the whole vector by 1 value
  73. void vector_2_times(RI_vector_2 *vector, double value){
  74. vector->x *= value;
  75. vector->y *= value;
  76. }
  77. void global_vector_2_times(__global RI_vector_2 *vector, double value){
  78. vector->x *= value;
  79. vector->y *= value;
  80. }
  81. // set all values of a global vector 3
  82. void global_vector_3_memset(__global RI_vector_3 *vector, double value){
  83. vector->x = value;
  84. vector->y = value;
  85. vector->z = value;
  86. }
  87. // set all values of a global vector 2
  88. void global_vector_2_memset(__global RI_vector_2 *vector, double value){
  89. vector->x = value;
  90. vector->y = value;
  91. }
  92. // set all values of a vector 3
  93. void vector_3_memset(RI_vector_3 *vector, double value){
  94. vector->x = value;
  95. vector->y = value;
  96. vector->z = value;
  97. }
  98. // set all values of a vector 2
  99. void vector_2_memset(RI_vector_2 *vector, double value){
  100. vector->x = value;
  101. vector->y = value;
  102. }
  103. // value-wise multiplacation.
  104. // multiply the whole vector by 1 value
  105. void vector_3_times(RI_vector_3 *vector, double value){
  106. vector->x *= value;
  107. vector->y *= value;
  108. vector->z *= value;
  109. }
  110. void global_vector_3_times(__global RI_vector_3 *vector, double value){
  111. vector->x *= value;
  112. vector->y *= value;
  113. vector->z *= value;
  114. }
  115. // hadamard multiplacation.
  116. // multiply each value of one vector with the matching one on the other vector
  117. void vector_3_hadamard(RI_vector_3 *multiplicand, RI_vector_3 multiplicator){
  118. multiplicand->x *= multiplicator.x;
  119. multiplicand->y *= multiplicator.y;
  120. multiplicand->z *= multiplicator.z;
  121. }
  122. void global_vector_3_hadamard(__global RI_vector_3 *multiplicand, RI_vector_3 multiplicator){
  123. multiplicand->x *= multiplicator.x;
  124. multiplicand->y *= multiplicator.y;
  125. multiplicand->z *= multiplicator.z;
  126. }
  127. // "hadamard" addition.
  128. // add each value of one vector with the matching one on the other vector
  129. void vector_2_element_wise_add(RI_vector_2 *addend_a, RI_vector_2 addend_b){
  130. addend_a->x += addend_b.x;
  131. addend_a->y += addend_b.y;
  132. }
  133. void global_vector_2_element_wise_add(__global RI_vector_2 *addend_a, RI_vector_2 addend_b){
  134. addend_a->x += addend_b.x;
  135. addend_a->y += addend_b.y;
  136. }
  137. // "hadamard" addition.
  138. // add each value of one vector with the matching one on the other vector
  139. void vector_3_element_wise_add(RI_vector_3 *addend_a, RI_vector_3 addend_b){
  140. addend_a->x += addend_b.x;
  141. addend_a->y += addend_b.y;
  142. addend_a->z += addend_b.z;
  143. }
  144. void global_vector_3_element_wise_add(__global RI_vector_3 *addend_a, RI_vector_3 addend_b){
  145. addend_a->x += addend_b.x;
  146. addend_a->y += addend_b.y;
  147. addend_a->z += addend_b.z;
  148. }
  149. // "hadamard" subtraction.
  150. // subtraction each value of one vector with the matching one on the other vector
  151. void vector_3_element_wise_subtract(RI_vector_3 *minuend, RI_vector_3 subtrahend){
  152. minuend->x -= subtrahend.x;
  153. minuend->y -= subtrahend.y;
  154. minuend->z -= subtrahend.z;
  155. }
  156. void global_vector_3_element_wise_subtract(__global RI_vector_3 *minuend, RI_vector_3 subtrahend){
  157. minuend->x -= subtrahend.x;
  158. minuend->y -= subtrahend.y;
  159. minuend->z -= subtrahend.z;
  160. }
  161. // conjugate a quaterion.
  162. // (flip the sign of the x, y, z values)
  163. void quaternion_conjugate(RI_vector_4* quaternion){
  164. quaternion->x *= -1.0;
  165. quaternion->y *= -1.0;
  166. quaternion->z *= -1.0;
  167. }
  168. void globla_quaternion_conjugate(__global RI_vector_4* quaternion){
  169. quaternion->x *= -1.0;
  170. quaternion->y *= -1.0;
  171. quaternion->z *= -1.0;
  172. }
  173. // quaternion multiplacation
  174. void quaternion_multiply(RI_vector_4* a, RI_vector_4 b){
  175. double w1 = a->w; double x1 = a->x; double y1 = a->y; double z1 = a->z;
  176. double w2 = b.w; double x2 = b.x; double y2 = b.y; double z2 = b.z;
  177. double w = w1*w2 - x1*x2 - y1*y2 - z1*z2;
  178. double x = w1*x2 + x1*w2 + y1*z2 - z1*y2;
  179. double y = w1*y2 - x1*z2 + y1*w2 + z1*x2;
  180. double z = w1*z2 + x1*y2 - y1*x2 + z1*w2;
  181. *a = (RI_vector_4){w, x, y, z};
  182. }
  183. void global_quaternion_multiply(__global RI_vector_4* a, RI_vector_4 b){
  184. double w1 = a->w; double x1 = a->x; double y1 = a->y; double z1 = a->z;
  185. double w2 = b.w; double x2 = b.x; double y2 = b.y; double z2 = b.z;
  186. double w = w1*w2 - x1*x2 - y1*y2 - z1*z2;
  187. double x = w1*x2 + x1*w2 + y1*z2 - z1*y2;
  188. double y = w1*y2 - x1*z2 + y1*w2 + z1*x2;
  189. double z = w1*z2 + x1*y2 - y1*x2 + z1*w2;
  190. *a = (RI_vector_4){w, x, y, z};
  191. }
  192. // linear interpolate between 2 vectors
  193. void vector_2_lerp(RI_vector_2 vector_a, RI_vector_2 vector_b, RI_vector_2 *result, double w1){
  194. double w0 = 1.0 - w1;
  195. vector_2_memset(result, 0);
  196. vector_2_times(&vector_a, w0);
  197. vector_2_times(&vector_b, w1);
  198. vector_2_element_wise_add(result, vector_a);
  199. vector_2_element_wise_add(result, vector_b);
  200. }
  201. void global_vector_2_lerp(RI_vector_2 vector_a, RI_vector_2 vector_b, __global RI_vector_2 *result, double w1){
  202. double w0 = 1.0 - w1;
  203. global_vector_2_memset(result, 0);
  204. vector_2_times(&vector_a, w0);
  205. vector_2_times(&vector_b, w1);
  206. global_vector_2_element_wise_add(result, vector_a);
  207. global_vector_2_element_wise_add(result, vector_b);
  208. }
  209. // linear interpolate between 2 vectors
  210. void vector_3_lerp(RI_vector_3 vector_a, RI_vector_3 vector_b, RI_vector_3 *result, double w1){
  211. double w0 = 1.0 - w1;
  212. vector_3_memset(result, 0);
  213. vector_3_times(&vector_a, w0);
  214. vector_3_times(&vector_b, w1);
  215. vector_3_element_wise_add(result, vector_a);
  216. vector_3_element_wise_add(result, vector_b);
  217. }
  218. void global_vector_3_lerp(RI_vector_3 vector_a, RI_vector_3 vector_b, __global RI_vector_3 *result, double w1){
  219. double w0 = 1.0 - w1;
  220. global_vector_3_memset(result, 0);
  221. vector_3_times(&vector_a, w0);
  222. vector_3_times(&vector_b, w1);
  223. global_vector_3_element_wise_add(result, vector_a);
  224. global_vector_3_element_wise_add(result, vector_b);
  225. }
  226. void quaternion_rotate(RI_vector_3 *position, RI_vector_4 rotation){
  227. RI_vector_4 pos_quat = {0, position->x, position->y, position->z};
  228. RI_vector_4 rotation_conjugation = rotation;
  229. quaternion_conjugate(&rotation_conjugation);
  230. quaternion_multiply(&rotation, pos_quat);
  231. quaternion_multiply(&rotation, rotation_conjugation);
  232. *position = (RI_vector_3){rotation.x, rotation.y, rotation.z};
  233. }
  234. void global_quaternion_rotate(__global RI_vector_3 *position, RI_vector_4 rotation){
  235. RI_vector_4 pos_quat = {0, position->x, position->y, position->z};
  236. RI_vector_4 rotation_conjugation = rotation;
  237. quaternion_conjugate(&rotation_conjugation);
  238. quaternion_multiply(&rotation, pos_quat);
  239. quaternion_multiply(&rotation, rotation_conjugation);
  240. *position = (RI_vector_3){rotation.x, rotation.y, rotation.z};
  241. }
  242. __kernel void clear_tile_array(__global uint* tiles){
  243. uint num_faces_per_tile = tiles[2];
  244. uint number_of_horizontal_tiles = tiles[3];
  245. uint number_of_vertical_tiles = tiles[4];
  246. int x = get_global_id(0); if (x >= number_of_horizontal_tiles) return;
  247. int y = get_global_id(1); if (y >= number_of_vertical_tiles) return;
  248. uint index = 5 + (y * number_of_horizontal_tiles + x) * (1 + num_faces_per_tile);
  249. tiles[index] = 0;
  250. return;
  251. }
  252. __kernel void transformer(__global RI_face *faces, __global RI_renderable_face *renderable_faces, double actor_x, double actor_y, double actor_z, double actor_r_w, double actor_r_x, double actor_r_y, double actor_r_z, double actor_s_x, double actor_s_y, double actor_s_z, int has_normals, int has_uvs, int face_array_offset_index, int face_count, int width, int height, double horizontal_fov_factor, double vertical_fov_factor, float min_clip, float max_clip, double camera_x, double camera_y, double camera_z, double camera_r_w, double camera_r_x, double camera_r_y, double camera_r_z, int renderable_face_offset, int face_sqrt, ushort texture_width, ushort texture_height, uint texture_index, __global uint* tiles, ushort frame_count, ushort frame_height, ushort current_frame){
  253. int face_index = get_global_id(1) * face_sqrt + get_global_id(0); if (face_index >= face_count) return;
  254. RI_vector_3 current_actor_position = (RI_vector_3){actor_x, actor_y, actor_z};
  255. RI_vector_4 current_actor_rotation = (RI_vector_4){actor_r_w, actor_r_x, actor_r_y, actor_r_z};
  256. RI_vector_3 current_actor_scale = (RI_vector_3){actor_s_x, actor_s_y, actor_s_z};
  257. RI_vector_3 camera_position = (RI_vector_3){camera_x, camera_y, camera_z};
  258. RI_vector_4 camera_rotation = (RI_vector_4){camera_r_w, camera_r_x, camera_r_y, camera_r_z};
  259. __global RI_face *cur_face = &faces[face_index + face_array_offset_index];
  260. __global RI_renderable_face *cur_r_face = &renderable_faces[face_index * 2 + renderable_face_offset];
  261. __global RI_renderable_face *cur_r_split_face = &renderable_faces[face_index * 2 + renderable_face_offset + 1];
  262. cur_r_face->should_render = 0;
  263. cur_r_face->is_split = 0;
  264. cur_r_face->is_shrunk = 0;
  265. cur_r_face->is_transformed = 0;
  266. cur_r_face->min_screen_x = -32767; cur_r_face->max_screen_x = 32767;
  267. cur_r_face->min_screen_y = -32767; cur_r_face->max_screen_y = 32767;
  268. cur_r_split_face->should_render = 0;
  269. cur_r_split_face->is_split = 0;
  270. cur_r_split_face->is_shrunk = 0;
  271. cur_r_split_face->is_transformed = 0;
  272. cur_r_split_face->min_screen_x = -32767; cur_r_split_face->max_screen_x = 32767;
  273. cur_r_split_face->min_screen_y = -32767; cur_r_split_face->max_screen_y = 32767;
  274. // cur_r_face->material = current_actor.material;
  275. cur_r_face->position_0 = cur_face->position_0;
  276. cur_r_face->position_1 = cur_face->position_1;
  277. cur_r_face->position_2 = cur_face->position_2;
  278. cur_r_face->normal_0 = cur_face->normal_0;
  279. cur_r_face->normal_1 = cur_face->normal_1;
  280. cur_r_face->normal_2 = cur_face->normal_2;
  281. if (has_uvs){
  282. cur_r_face->uv_0 = cur_face->uv_0;
  283. cur_r_face->uv_1 = cur_face->uv_1;
  284. cur_r_face->uv_2 = cur_face->uv_2;
  285. }
  286. cur_r_face->texture.width = texture_width;
  287. cur_r_face->texture.height = texture_height;
  288. cur_r_face->texture.index = texture_index;
  289. cur_r_face->texture.frame_count = frame_count;
  290. cur_r_face->texture.current_frame = current_frame;
  291. cur_r_face->texture.frame_height = frame_height;
  292. cur_r_split_face->texture.width = texture_width;
  293. cur_r_split_face->texture.height = texture_height;
  294. cur_r_split_face->texture.index = texture_index;
  295. cur_r_split_face->texture.frame_count = frame_count;
  296. cur_r_split_face->texture.current_frame = current_frame;
  297. cur_r_split_face->texture.frame_height = frame_height;
  298. // scale
  299. global_vector_3_hadamard(&cur_r_face->position_0, current_actor_scale);
  300. global_vector_3_hadamard(&cur_r_face->position_1, current_actor_scale);
  301. global_vector_3_hadamard(&cur_r_face->position_2, current_actor_scale);
  302. // actor rotation
  303. global_quaternion_rotate(&cur_r_face->position_0, current_actor_rotation);
  304. global_quaternion_rotate(&cur_r_face->position_1, current_actor_rotation);
  305. global_quaternion_rotate(&cur_r_face->position_2, current_actor_rotation);
  306. global_quaternion_rotate(&cur_r_face->normal_0, current_actor_rotation);
  307. global_quaternion_rotate(&cur_r_face->normal_1, current_actor_rotation);
  308. global_quaternion_rotate(&cur_r_face->normal_2, current_actor_rotation);
  309. // object position
  310. global_vector_3_element_wise_add(&cur_r_face->position_0, current_actor_position);
  311. global_vector_3_element_wise_add(&cur_r_face->position_1, current_actor_position);
  312. global_vector_3_element_wise_add(&cur_r_face->position_2, current_actor_position);
  313. // camera position & rotation
  314. global_vector_3_element_wise_subtract(&cur_r_face->position_0, camera_position);
  315. global_vector_3_element_wise_subtract(&cur_r_face->position_1, camera_position);
  316. global_vector_3_element_wise_subtract(&cur_r_face->position_2, camera_position);
  317. global_quaternion_rotate(&cur_r_face->position_0, camera_rotation);
  318. global_quaternion_rotate(&cur_r_face->position_1, camera_rotation);
  319. global_quaternion_rotate(&cur_r_face->position_2, camera_rotation);
  320. __global RI_vector_3 *pos_0 = &cur_r_face->position_0;
  321. __global RI_vector_3 *pos_1 = &cur_r_face->position_1;
  322. __global RI_vector_3 *pos_2 = &cur_r_face->position_2;
  323. int is_0_clipped = pos_0->z <= min_clip;
  324. int is_1_clipped = pos_1->z <= min_clip;
  325. int is_2_clipped = pos_2->z <= min_clip;
  326. int clip_count = is_0_clipped + is_1_clipped + is_2_clipped;
  327. switch(clip_count){
  328. case 3: {// ignore polygon, it's behind the camera
  329. return;
  330. break;
  331. }
  332. case 2:{ // shrink poylgon
  333. RI_vector_3 unclipped_point, point_a, point_b;
  334. RI_vector_3 unclipped_normal, normal_a, normal_b;
  335. RI_vector_2 unclipped_uv, uv_a, uv_b;
  336. __global RI_vector_3 *result_a, *result_b;
  337. __global RI_vector_3 *n_result_a, *n_result_b;
  338. __global RI_vector_2 *u_result_a, *u_result_b;
  339. if (!is_0_clipped){
  340. unclipped_point = cur_r_face->position_0;
  341. point_a = cur_r_face->position_1;
  342. result_a = &cur_r_face->position_1;
  343. point_b = cur_r_face->position_2;
  344. result_b = &cur_r_face->position_2;
  345. unclipped_normal = cur_r_face->normal_0;
  346. normal_a = cur_r_face->normal_1;
  347. n_result_a = &cur_r_face->normal_1;
  348. normal_b = cur_r_face->normal_2;
  349. n_result_b = &cur_r_face->normal_2;
  350. unclipped_uv = cur_r_face->uv_0;
  351. u_result_a = &cur_r_face->uv_1;
  352. uv_a = cur_r_face->uv_1;
  353. uv_b = cur_r_face->uv_2;
  354. u_result_b = &cur_r_face->uv_2;
  355. }
  356. else if (!is_1_clipped){
  357. unclipped_point = cur_r_face->position_1;
  358. point_a = cur_r_face->position_2;
  359. result_a = &cur_r_face->position_2;
  360. point_b = cur_r_face->position_0;
  361. result_b = &cur_r_face->position_0;
  362. unclipped_normal = cur_r_face->normal_1;
  363. normal_a = cur_r_face->normal_2;
  364. n_result_a = &cur_r_face->normal_2;
  365. normal_b = cur_r_face->normal_0;
  366. n_result_b = &cur_r_face->normal_0;
  367. unclipped_uv = cur_r_face->uv_1;
  368. u_result_a = &cur_r_face->uv_2;
  369. uv_a = cur_r_face->uv_2;
  370. uv_b = cur_r_face->uv_0;
  371. u_result_b = &cur_r_face->uv_0;
  372. }
  373. else if (!is_2_clipped){
  374. unclipped_point = cur_r_face->position_2;
  375. point_a = cur_r_face->position_0;
  376. result_a = &cur_r_face->position_0;
  377. point_b = cur_r_face->position_1;
  378. result_b = &cur_r_face->position_1;
  379. unclipped_normal = cur_r_face->normal_2;
  380. normal_a = cur_r_face->normal_0;
  381. n_result_a = &cur_r_face->normal_0;
  382. normal_b = cur_r_face->normal_1;
  383. n_result_b = &cur_r_face->normal_1;
  384. unclipped_uv = cur_r_face->uv_2;
  385. u_result_a = &cur_r_face->uv_0;
  386. uv_a = cur_r_face->uv_0;
  387. uv_b = cur_r_face->uv_1;
  388. u_result_b = &cur_r_face->uv_1;
  389. }
  390. double fraction_a_to_unclip = clamp((min_clip - unclipped_point.z) / (point_a.z - unclipped_point.z), 0.0, 1.1);
  391. double fraction_b_to_unclip = clamp((min_clip - unclipped_point.z) / (point_b.z - unclipped_point.z), 0.0, 1.1);
  392. global_vector_3_lerp(unclipped_point, point_a, result_a, fraction_a_to_unclip);
  393. global_vector_3_lerp(unclipped_point, point_b, result_b, fraction_b_to_unclip);
  394. global_vector_3_lerp(unclipped_normal, normal_a, n_result_a, fraction_a_to_unclip);
  395. global_vector_3_lerp(unclipped_normal, normal_b, n_result_b, fraction_b_to_unclip);
  396. global_vector_2_lerp(unclipped_uv, uv_a, u_result_a, fraction_a_to_unclip);
  397. global_vector_2_lerp(unclipped_uv, uv_b, u_result_b, fraction_b_to_unclip);
  398. cur_r_face->is_shrunk = 1;
  399. cur_r_face->should_render = 1;
  400. break;
  401. }
  402. case 1: {// split polygon
  403. RI_vector_3 clipped_point, point_a, point_b;
  404. RI_vector_3 clipped_normal, normal_a, normal_b;
  405. RI_vector_2 clipped_uv, uv_a, uv_b;
  406. if (is_0_clipped){
  407. clipped_point = cur_r_face->position_0;
  408. point_a = cur_r_face->position_1;
  409. point_b = cur_r_face->position_2;
  410. clipped_normal = cur_r_face->normal_0;
  411. normal_a = cur_r_face->normal_1;
  412. normal_b = cur_r_face->normal_2;
  413. clipped_uv = cur_r_face->uv_0;
  414. uv_a = cur_r_face->uv_1;
  415. uv_b = cur_r_face->uv_2;
  416. }
  417. else if (is_1_clipped){
  418. clipped_point = cur_r_face->position_1;
  419. point_a = cur_r_face->position_2;
  420. point_b = cur_r_face->position_0;
  421. clipped_normal = cur_r_face->normal_1;
  422. normal_a = cur_r_face->normal_2;
  423. normal_b = cur_r_face->normal_0;
  424. clipped_uv = cur_r_face->uv_1;
  425. uv_a = cur_r_face->uv_2;
  426. uv_b = cur_r_face->uv_0;
  427. }
  428. else if (is_2_clipped){
  429. clipped_point = cur_r_face->position_2;
  430. point_a = cur_r_face->position_0;
  431. point_b = cur_r_face->position_1;
  432. clipped_normal = cur_r_face->normal_2;
  433. normal_a = cur_r_face->normal_0;
  434. normal_b = cur_r_face->normal_1;
  435. clipped_uv = cur_r_face->uv_2;
  436. uv_a = cur_r_face->uv_0;
  437. uv_b = cur_r_face->uv_1;
  438. }
  439. double fraction_a_to_clip = (min_clip - clipped_point.z) / (point_a.z - clipped_point.z);
  440. double fraction_b_to_clip = (min_clip - clipped_point.z) / (point_b.z - clipped_point.z);
  441. RI_vector_3 new_point_a, new_point_b; // the new points that move along the polygon's edge to match the z value of min_clip.
  442. RI_vector_3 new_normal_a, new_normal_b; // they come from the clipped point which was originally only 1
  443. RI_vector_2 new_uv_a, new_uv_b;
  444. vector_3_lerp(clipped_point, point_a, &new_point_a, fraction_a_to_clip);
  445. vector_3_lerp(clipped_point, point_b, &new_point_b, fraction_b_to_clip);
  446. vector_3_lerp(clipped_normal, normal_a, &new_normal_a, fraction_a_to_clip);
  447. vector_3_lerp(clipped_normal, normal_b, &new_normal_b, fraction_b_to_clip);
  448. vector_2_lerp(clipped_uv, uv_a, &new_uv_a, fraction_a_to_clip);
  449. vector_2_lerp(clipped_uv, uv_b, &new_uv_b, fraction_b_to_clip);
  450. // okay, now we have a quad (in clockwise order, point a, point b, new point b, new point a)
  451. // quads are easy to turn into tris >w<
  452. // cur_r_split_face->parent_actor = current_actor;
  453. // cur_r_split_face->material = cur_r_face->material;
  454. cur_r_face->position_0 = point_a;
  455. cur_r_face->position_1 = point_b;
  456. cur_r_face->position_2 = new_point_a;
  457. cur_r_face->normal_0 = normal_a;
  458. cur_r_face->normal_1 = normal_b;
  459. cur_r_face->normal_2 = new_normal_a;
  460. cur_r_face->uv_0 = uv_a;
  461. cur_r_face->uv_1 = uv_b;
  462. cur_r_face->uv_2 = new_uv_a;
  463. cur_r_split_face->position_0 = point_b;
  464. cur_r_split_face->position_1 = new_point_b;
  465. cur_r_split_face->position_2 = new_point_a;
  466. cur_r_split_face->normal_0 = normal_b;
  467. cur_r_split_face->normal_1 = new_normal_b;
  468. cur_r_split_face->normal_2 = new_normal_a;
  469. cur_r_split_face->uv_0 = uv_b;
  470. cur_r_split_face->uv_1 = new_uv_b;
  471. cur_r_split_face->uv_2 = new_uv_a;
  472. cur_r_split_face->position_0.x = cur_r_split_face->position_0.x / cur_r_split_face->position_0.z * horizontal_fov_factor;
  473. cur_r_split_face->position_0.y = cur_r_split_face->position_0.y / cur_r_split_face->position_0.z * vertical_fov_factor;
  474. cur_r_split_face->position_1.x = cur_r_split_face->position_1.x / cur_r_split_face->position_1.z * horizontal_fov_factor;
  475. cur_r_split_face->position_1.y = cur_r_split_face->position_1.y / cur_r_split_face->position_1.z * vertical_fov_factor;
  476. cur_r_split_face->position_2.x = cur_r_split_face->position_2.x / cur_r_split_face->position_2.z * horizontal_fov_factor;
  477. cur_r_split_face->position_2.y = cur_r_split_face->position_2.y / cur_r_split_face->position_2.z * vertical_fov_factor;
  478. cur_r_split_face->min_screen_x = cur_r_split_face->position_0.x;
  479. if (cur_r_split_face->position_1.x < cur_r_split_face->min_screen_x) cur_r_split_face->min_screen_x = cur_r_split_face->position_1.x;
  480. if (cur_r_split_face->position_2.x < cur_r_split_face->min_screen_x) cur_r_split_face->min_screen_x = cur_r_split_face->position_2.x;
  481. cur_r_split_face->min_screen_x = max(cur_r_split_face->min_screen_x, (short)(-width / 2));
  482. cur_r_split_face->max_screen_x = cur_r_split_face->position_0.x;
  483. if (cur_r_split_face->position_1.x > cur_r_split_face->max_screen_x) cur_r_split_face->max_screen_x = cur_r_split_face->position_1.x;
  484. if (cur_r_split_face->position_2.x > cur_r_split_face->max_screen_x) cur_r_split_face->max_screen_x = cur_r_split_face->position_2.x;
  485. cur_r_split_face->max_screen_x = min(cur_r_split_face->max_screen_x, (short)(width / 2));
  486. cur_r_split_face->min_screen_y = cur_r_split_face->position_0.y;
  487. if (cur_r_split_face->position_1.y < cur_r_split_face->min_screen_y) cur_r_split_face->min_screen_y = cur_r_split_face->position_1.y;
  488. if (cur_r_split_face->position_2.y < cur_r_split_face->min_screen_y) cur_r_split_face->min_screen_y = cur_r_split_face->position_2.y;
  489. cur_r_split_face->min_screen_y = max(cur_r_split_face->min_screen_y, (short)(-height / 2));
  490. cur_r_split_face->max_screen_y = cur_r_split_face->position_0.y;
  491. if (cur_r_split_face->position_1.y > cur_r_split_face->max_screen_y) cur_r_split_face->max_screen_y = cur_r_split_face->position_1.y;
  492. if (cur_r_split_face->position_2.y > cur_r_split_face->max_screen_y) cur_r_split_face->max_screen_y = cur_r_split_face->position_2.y;
  493. cur_r_split_face->max_screen_y = min(cur_r_split_face->max_screen_y, (short)(height / 2));
  494. cur_r_split_face->should_render = 1;
  495. cur_r_face->should_render = 1;
  496. cur_r_split_face->texture.width = texture_width;
  497. cur_r_split_face->texture.height = texture_height;
  498. cur_r_split_face->texture.index = texture_index;
  499. cur_r_split_face->is_split = 1;
  500. cur_r_face->is_transformed = 1;
  501. break;
  502. }
  503. case 0:{ // no issues, ignore
  504. cur_r_face->should_render = 1;
  505. break;
  506. }
  507. }
  508. // current_actor.material->vertex_shader(&cur_r_face->position_0, &cur_r_face->position_1, &cur_r_face->position_2, horizontal_fov_factor, vertical_fov_factor);
  509. cur_r_face->position_0.x = cur_r_face->position_0.x / cur_r_face->position_0.z * horizontal_fov_factor;
  510. cur_r_face->position_0.y = cur_r_face->position_0.y / cur_r_face->position_0.z * vertical_fov_factor;
  511. cur_r_face->position_1.x = cur_r_face->position_1.x / cur_r_face->position_1.z * horizontal_fov_factor;
  512. cur_r_face->position_1.y = cur_r_face->position_1.y / cur_r_face->position_1.z * vertical_fov_factor;
  513. cur_r_face->position_2.x = cur_r_face->position_2.x / cur_r_face->position_2.z * horizontal_fov_factor;
  514. cur_r_face->position_2.y = cur_r_face->position_2.y / cur_r_face->position_2.z * vertical_fov_factor;
  515. cur_r_face->min_screen_x = pos_0->x;
  516. if (pos_1->x < cur_r_face->min_screen_x) cur_r_face->min_screen_x = pos_1->x;
  517. if (pos_2->x < cur_r_face->min_screen_x) cur_r_face->min_screen_x = pos_2->x;
  518. cur_r_face->min_screen_x = max(cur_r_face->min_screen_x, (short)(-width / 2));
  519. cur_r_face->max_screen_x = pos_0->x;
  520. if (pos_1->x > cur_r_face->max_screen_x) cur_r_face->max_screen_x = pos_1->x;
  521. if (pos_2->x > cur_r_face->max_screen_x) cur_r_face->max_screen_x = pos_2->x;
  522. cur_r_face->max_screen_x = min(cur_r_face->max_screen_x, (short)(width / 2));
  523. cur_r_face->min_screen_y = pos_0->y;
  524. if (pos_1->y < cur_r_face->min_screen_y) cur_r_face->min_screen_y = pos_1->y;
  525. if (pos_2->y < cur_r_face->min_screen_y) cur_r_face->min_screen_y = pos_2->y;
  526. cur_r_face->min_screen_y = max(cur_r_face->min_screen_y, (short)(-height / 2));
  527. cur_r_face->max_screen_y = pos_0->y;
  528. if (pos_1->y > cur_r_face->max_screen_y) cur_r_face->max_screen_y = pos_1->y;
  529. if (pos_2->y > cur_r_face->max_screen_y) cur_r_face->max_screen_y = pos_2->y;
  530. cur_r_face->max_screen_y = min(cur_r_face->max_screen_y, (short)(height / 2));
  531. uint tile_width = tiles[0];
  532. uint tile_height = tiles[1];
  533. uint faces_per_tile = tiles[2];
  534. uint number_of_horizontal_tiles = tiles[3];
  535. uint number_of_vertical_tiles = tiles[4];
  536. uint tile_x_min = clamp(
  537. (uint)floor((cur_r_face->min_screen_x + 0.5f * width) / tile_width),
  538. 0u,
  539. number_of_horizontal_tiles - 1
  540. );
  541. uint tile_x_max = clamp(
  542. (uint)floor((cur_r_face->max_screen_x + 0.5f * width) / tile_width),
  543. 0u,
  544. number_of_horizontal_tiles - 1
  545. );
  546. uint tile_y_min = clamp(
  547. (uint)floor((cur_r_face->min_screen_y + 0.5f * height) / tile_height),
  548. 0u,
  549. number_of_vertical_tiles - 1
  550. );
  551. uint tile_y_max = clamp(
  552. (uint)floor((cur_r_face->max_screen_y + 0.5f * height) / tile_height),
  553. 0u,
  554. number_of_vertical_tiles - 1
  555. );
  556. if (cur_r_split_face->should_render){
  557. uint split_tile_x_min = clamp(
  558. (uint)floor((cur_r_split_face->min_screen_x + 0.5f * width) / tile_width),
  559. 0u,
  560. number_of_horizontal_tiles - 1
  561. );
  562. uint split_tile_x_max = clamp(
  563. (uint)floor((cur_r_split_face->max_screen_x + 0.5f * width) / tile_width),
  564. 0u,
  565. number_of_horizontal_tiles - 1
  566. );
  567. uint split_tile_y_min = clamp(
  568. (uint)floor((cur_r_split_face->min_screen_y + 0.5f * height) / tile_height),
  569. 0u,
  570. number_of_vertical_tiles - 1
  571. );
  572. uint split_tile_y_max = clamp(
  573. (uint)floor((cur_r_split_face->max_screen_y + 0.5f * height) / tile_height),
  574. 0u,
  575. number_of_vertical_tiles - 1
  576. );
  577. tile_x_min = min(tile_x_min, split_tile_x_min);
  578. tile_x_max = max(tile_x_max, split_tile_x_max);
  579. tile_y_min = min(tile_y_min, split_tile_y_min);
  580. tile_y_max = max(tile_y_max, split_tile_y_max);
  581. }
  582. for (uint y = tile_y_min; y <= tile_y_max; y++){
  583. for (uint x = tile_x_min; x <= tile_x_max; x++){
  584. uint tile_array_index = 5 + (y * number_of_horizontal_tiles + x) * (1 + faces_per_tile);
  585. if (cur_r_split_face->should_render){
  586. uint num_faces_in_cur_tile = atomic_fetch_add((volatile __global atomic_uint*)&tiles[tile_array_index], 2);
  587. tiles[tile_array_index + num_faces_in_cur_tile + 1] = face_index * 2 + renderable_face_offset;
  588. tiles[tile_array_index + num_faces_in_cur_tile + 2] = face_index * 2 + renderable_face_offset + 1;
  589. }
  590. else {
  591. uint num_faces_in_cur_tile = atomic_fetch_add((volatile __global atomic_uint*)&tiles[tile_array_index], 1);
  592. tiles[tile_array_index + num_faces_in_cur_tile + 1] = face_index * 2 + renderable_face_offset;
  593. }
  594. }
  595. }
  596. return;
  597. }
  598. __kernel void rasterizer(__global RI_renderable_face *renderable_faces, __global uint* textures, __global uint *frame_buffer, int width, int height, int half_width, int half_height, int number_of_renderable_faces, int number_of_split_renderable_faces, __global uint* tiles){
  599. int pixel_x = get_global_id(0); if (pixel_x >= width) return;
  600. int pixel_y = get_global_id(1); if (pixel_y >= height) return;
  601. int idx = (height - pixel_y) * width + pixel_x;
  602. int x = pixel_x - half_width;
  603. int y = pixel_y - half_height;
  604. uint tile_width = tiles[0];
  605. uint tile_height = tiles[1];
  606. uint faces_per_tile = tiles[2];
  607. uint number_of_horizontal_tiles = tiles[3];
  608. uint number_of_vertical_tiles = tiles[4];
  609. uint tile_x = fmin(fmax(floor((float)(pixel_x / tile_width)), 0), number_of_horizontal_tiles);
  610. uint tile_y = fmin(fmax(floor((float)(pixel_y / tile_height)), 0), number_of_vertical_tiles);
  611. double z = INFINITY;
  612. uint pixel_color = 0x11111111;
  613. uint tile_array_index = 5 + (tile_y * number_of_horizontal_tiles + tile_x) * (1 + faces_per_tile);
  614. uint num_faces_in_cur_tile = tiles[tile_array_index];
  615. // debug tiles
  616. // if (num_faces_in_cur_tile > 0) pixel_color = 0x00AA00FF;
  617. for (int face_i = 0; face_i < num_faces_in_cur_tile; ++face_i){
  618. __global RI_renderable_face *current_face = &renderable_faces[tiles[tile_array_index + face_i + 1]];
  619. if (!current_face->should_render) continue;
  620. RI_vector_2 uv_0 = current_face->uv_0;
  621. RI_vector_2 uv_1 = current_face->uv_1;
  622. RI_vector_2 uv_2 = current_face->uv_2;
  623. RI_vector_3 normal_0 = current_face->normal_0;
  624. RI_vector_3 normal_1 = current_face->normal_1;
  625. RI_vector_3 normal_2 = current_face->normal_2;
  626. RI_vector_3 pos_0 = current_face->position_0;
  627. RI_vector_3 pos_1 = current_face->position_1;
  628. RI_vector_3 pos_2 = current_face->position_2;
  629. if (x < current_face->min_screen_x || x > current_face->max_screen_x || y < current_face->min_screen_y || y > current_face->max_screen_y)
  630. continue;
  631. double denominator, w0, w1, w2;
  632. denominator = (pos_1.y - pos_2.y) * (pos_0.x - pos_2.x) + (pos_2.x - pos_1.x) * (pos_0.y - pos_2.y);
  633. if (denominator >= 0) continue;
  634. w0 = ((pos_1.y - pos_2.y) * (x - pos_2.x) + (pos_2.x - pos_1.x) * (y - pos_2.y)) / denominator;
  635. w1 = ((pos_2.y - pos_0.y) * (x - pos_0.x) + (pos_0.x - pos_2.x) * (y - pos_0.y)) / denominator;
  636. w2 = 1.0 - w0 - w1;
  637. double w_over_z = (w0 / pos_0.z + w1 / pos_1.z + w2 / pos_2.z);
  638. double interpolated_z = 1.0 / w_over_z;
  639. if (!(w0 >= 0 && w1 >= 0 && w2 >= 0)){
  640. continue;
  641. }
  642. if (interpolated_z >= z){
  643. continue;
  644. }
  645. double alpha = 1;
  646. float ux = (w0 * (uv_0.x / pos_0.z) + w1 * (uv_1.x / pos_1.z) + w2 * (uv_2.x / pos_2.z)) / w_over_z;
  647. float uy = (w0 * (uv_0.y / pos_0.z) + w1 * (uv_1.y / pos_1.z) + w2 * (uv_2.y / pos_2.z)) / w_over_z;
  648. RI_vector_3 interpolated_normal = {0};
  649. uint texel_x = current_face->texture.width * ux;
  650. uint texel_y = current_face->texture.frame_height * uy + current_face->texture.frame_height * (current_face->texture.current_frame % current_face->texture.frame_count);
  651. uint texel_index = current_face->texture.index +
  652. texel_y * current_face->texture.width + texel_x;
  653. if (textures[texel_index] & 0x000000FF){ // skip any pixel that is completly transparent
  654. pixel_color = textures[texel_index];
  655. z = interpolated_z;
  656. }
  657. // debug clipped tris
  658. // pixel_color = 0x777777FF;
  659. // if (current_face->is_split) pixel_color |= 0x00FF00FF;
  660. // if (current_face->is_shrunk) pixel_color |= 0xFFFFFFFF;
  661. // if (current_face->is_transformed) pixel_color |= 0xFF00FFFF;
  662. }
  663. // debug tiles
  664. // if (pixel_x % tile_width == 0 || pixel_y % tile_height == 0) pixel_color = 0xFFFFFFFF;
  665. frame_buffer[idx] = pixel_color;
  666. return;
  667. }