apple_hello_world.c 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. //
  2. // File: hello.c
  3. //
  4. // Abstract: A simple "Hello World" compute example showing basic usage of OpenCL which
  5. // calculates the mathematical square (X[i] = pow(X[i],2)) for a buffer of
  6. // floating point values.
  7. //
  8. //
  9. // Version: <1.0>
  10. //
  11. // Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple")
  12. // in consideration of your agreement to the following terms, and your use,
  13. // installation, modification or redistribution of this Apple software
  14. // constitutes acceptance of these terms. If you do not agree with these
  15. // terms, please do not use, install, modify or redistribute this Apple
  16. // software.
  17. //
  18. // In consideration of your agreement to abide by the following terms, and
  19. // subject to these terms, Apple grants you a personal, non - exclusive
  20. // license, under Apple's copyrights in this original Apple software ( the
  21. // "Apple Software" ), to use, reproduce, modify and redistribute the Apple
  22. // Software, with or without modifications, in source and / or binary forms;
  23. // provided that if you redistribute the Apple Software in its entirety and
  24. // without modifications, you must retain this notice and the following text
  25. // and disclaimers in all such redistributions of the Apple Software. Neither
  26. // the name, trademarks, service marks or logos of Apple Inc. may be used to
  27. // endorse or promote products derived from the Apple Software without specific
  28. // prior written permission from Apple. Except as expressly stated in this
  29. // notice, no other rights or licenses, express or implied, are granted by
  30. // Apple herein, including but not limited to any patent rights that may be
  31. // infringed by your derivative works or by other works in which the Apple
  32. // Software may be incorporated.
  33. //
  34. // The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO
  35. // WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
  36. // WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
  37. // PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
  38. // ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
  39. //
  40. // IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
  41. // CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  42. // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  43. // INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
  44. // AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
  45. // UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
  46. // OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  47. //
  48. // Copyright ( C ) 2008 Apple Inc. All Rights Reserved.
  49. //
  50. ////////////////////////////////////////////////////////////////////////////////
  51. #include <fcntl.h>
  52. #include <stdio.h>
  53. #include <stdlib.h>
  54. #include <string.h>
  55. #include <math.h>
  56. #include <unistd.h>
  57. #include <sys/types.h>
  58. #include <sys/stat.h>
  59. #include <CL/cl.h>
  60. ////////////////////////////////////////////////////////////////////////////////
  61. // Use a static data size for simplicity
  62. //
  63. #define DATA_SIZE (1024)
  64. ////////////////////////////////////////////////////////////////////////////////
  65. // Simple compute kernel which computes the square of an input array
  66. //
  67. const char *KernelSource = "\n" \
  68. "__kernel void square( \n" \
  69. " __global float* input, \n" \
  70. " __global float* output, \n" \
  71. " const unsigned int count) \n" \
  72. "{ \n" \
  73. " int i = get_global_id(0); \n" \
  74. " if(i < count) \n" \
  75. " output[i] = input[i] * input[i]; \n" \
  76. "} \n" \
  77. "\n";
  78. ////////////////////////////////////////////////////////////////////////////////
  79. int main(int argc, char** argv)
  80. {
  81. int err; // error code returned from api calls
  82. float data[DATA_SIZE]; // original data set given to device
  83. float results[DATA_SIZE]; // results returned from device
  84. unsigned int correct; // number of correct results returned
  85. size_t global; // global domain size for our calculation
  86. size_t local; // local domain size for our calculation
  87. cl_device_id device_id; // compute device id
  88. cl_context context; // compute context
  89. cl_command_queue commands; // compute command queue
  90. cl_program program; // compute program
  91. cl_kernel kernel; // compute kernel
  92. cl_mem input; // device memory used for the input array
  93. cl_mem output; // device memory used for the output array
  94. // Fill our data set with random float values
  95. //
  96. int i = 0;
  97. unsigned int count = DATA_SIZE;
  98. for(i = 0; i < count; i++)
  99. data[i] = rand() / (float)RAND_MAX;
  100. // Connect to a compute device
  101. //
  102. int gpu = 1;
  103. err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
  104. if (err != CL_SUCCESS)
  105. {
  106. printf("Error: Failed to create a device group!\n");
  107. return EXIT_FAILURE;
  108. }
  109. // Create a compute context
  110. //
  111. context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
  112. if (!context)
  113. {
  114. printf("Error: Failed to create a compute context!\n");
  115. return EXIT_FAILURE;
  116. }
  117. // Create a command commands
  118. //
  119. commands = clCreateCommandQueue(context, device_id, 0, &err);
  120. if (!commands)
  121. {
  122. printf("Error: Failed to create a command commands!\n");
  123. return EXIT_FAILURE;
  124. }
  125. // Create the compute program from the source buffer
  126. //
  127. program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
  128. if (!program)
  129. {
  130. printf("Error: Failed to create compute program!\n");
  131. return EXIT_FAILURE;
  132. }
  133. // Build the program executable
  134. //
  135. err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  136. if (err != CL_SUCCESS)
  137. {
  138. size_t len;
  139. char buffer[2048];
  140. printf("Error: Failed to build program executable!\n");
  141. clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
  142. printf("%s\n", buffer);
  143. exit(1);
  144. }
  145. // Create the compute kernel in the program we wish to run
  146. //
  147. kernel = clCreateKernel(program, "square", &err);
  148. if (!kernel || err != CL_SUCCESS)
  149. {
  150. printf("Error: Failed to create compute kernel!\n");
  151. exit(1);
  152. }
  153. // Create the input and output arrays in device memory for our calculation
  154. //
  155. input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, NULL);
  156. output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL);
  157. if (!input || !output)
  158. {
  159. printf("Error: Failed to allocate device memory!\n");
  160. exit(1);
  161. }
  162. // Write our data set into the input array in device memory
  163. //
  164. err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL);
  165. if (err != CL_SUCCESS)
  166. {
  167. printf("Error: Failed to write to source array!\n");
  168. exit(1);
  169. }
  170. // Set the arguments to our compute kernel
  171. //
  172. err = 0;
  173. err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
  174. err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
  175. err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
  176. if (err != CL_SUCCESS)
  177. {
  178. printf("Error: Failed to set kernel arguments! %d\n", err);
  179. exit(1);
  180. }
  181. // Get the maximum work group size for executing the kernel on the device
  182. //
  183. err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
  184. if (err != CL_SUCCESS)
  185. {
  186. printf("Error: Failed to retrieve kernel work group info! %d\n", err);
  187. exit(1);
  188. }
  189. // Execute the kernel over the entire range of our 1d input data set
  190. // using the maximum number of work group items for this device
  191. //
  192. global = count;
  193. err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
  194. if (err)
  195. {
  196. printf("Error: Failed to execute kernel!\n");
  197. return EXIT_FAILURE;
  198. }
  199. // Wait for the command commands to get serviced before reading back results
  200. //
  201. clFinish(commands);
  202. // Read back the results from the device to verify the output
  203. //
  204. err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL );
  205. if (err != CL_SUCCESS)
  206. {
  207. printf("Error: Failed to read output array! %d\n", err);
  208. exit(1);
  209. }
  210. // Validate our results
  211. //
  212. correct = 0;
  213. for(i = 0; i < count; i++)
  214. {
  215. if(results[i] == data[i] * data[i])
  216. correct++;
  217. }
  218. // Print a brief summary detailing the results
  219. //
  220. printf("Computed '%d/%d' correct values!\n", correct, count);
  221. // Shutdown and cleanup
  222. //
  223. clReleaseMemObject(input);
  224. clReleaseMemObject(output);
  225. clReleaseProgram(program);
  226. clReleaseKernel(kernel);
  227. clReleaseCommandQueue(commands);
  228. clReleaseContext(context);
  229. return 0;
  230. }