Removed some of the C'ness, moving stuff into the kernel

master
MitchellHansen 9 years ago
parent 515c0767a9
commit c5d73bd5fd

@ -13,8 +13,6 @@
#define SUCCESS 0 #define SUCCESS 0
#define FAILURE 1 #define FAILURE 1
float elap_time() { float elap_time() {
static __int64 start = 0; static __int64 start = 0;
static __int64 frequency = 0; static __int64 frequency = 0;
@ -71,77 +69,77 @@ int main(int argc, char* argv[])
// ============================== OpenCL Setup ================================================================== // ============================== OpenCL Setup ==================================================================
/*Step1: Getting platforms and choose an available one.*/ // Get the platforms
cl_uint numPlatforms; //the NO. of platforms cl_uint numPlatforms;
cl_platform_id platform = NULL; //the chosen platform cl_platform_id platform = NULL;
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); // Retrieve the number of platforms
if (status != CL_SUCCESS) { if (status != CL_SUCCESS) {
std::cout << "Error: Getting platforms!" << std::endl; std::cout << "Error: Getting platforms!" << std::endl;
return FAILURE; return FAILURE;
} }
// Choose the first available platform // Choose the first available platform
if(numPlatforms > 0) if(numPlatforms > 0) {
{ cl_platform_id* platforms = new cl_platform_id[numPlatforms];
cl_platform_id* platforms = (cl_platform_id* )malloc(numPlatforms* sizeof(cl_platform_id)); status = clGetPlatformIDs(numPlatforms, platforms, NULL); // Now populate the array with the platforms
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
platform = platforms[0]; platform = platforms[0];
free(platforms); delete platforms;
} }
/*Step 2:Query the platform and choose the first GPU device if has one.Otherwise use the CPU as device.*/
cl_uint numDevices = 0; cl_uint numDevices = 0;
cl_device_id *devices; cl_device_id *devices;
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
if (numDevices == 0) { //no GPU available. if (numDevices == 0) { //no GPU available.
std::cout << "No GPU device available." << std::endl; std::cout << "No GPU device available." << std::endl;
std::cout << "Choose CPU as default device." << std::endl; std::cout << "Choose CPU as default device." << std::endl;
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices);
devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id)); devices = new cl_device_id[numDevices];
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL);
} }
else { else {
devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id)); devices = new cl_device_id[numDevices];
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
} }
/*Step 3: Create context.*/
cl_context context = clCreateContext(NULL,1, devices,NULL,NULL,NULL); cl_context context = clCreateContext(NULL,1, devices,NULL,NULL,NULL);
/*Step 4: Creating command queue associate with the context.*/
cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL); cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
// ============================== Kernel Compilation, Setup ==================================================== // ============================== Kernel Compilation, Setup ====================================================
/*Step 5: Create program object */ // Read the kernel from the file to a string
const char *filename = "HelloWorld_Kernel.cl"; const char *filename = "conway_kernel.cl";
std::string sourceStr; std::string sourceStr;
status = convertToString(filename, sourceStr); status = convertToString(filename, sourceStr);
// Create a program with the source
const char *source = sourceStr.c_str(); const char *source = sourceStr.c_str();
size_t sourceSize[] = {strlen(source)}; size_t sourceSize[] = {strlen(source)};
cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL); cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
// Build program and set kernel // Build the program
status=clBuildProgram(program, 1,devices,NULL,NULL,NULL); status = clBuildProgram(program, 1,devices,NULL,NULL,NULL);
// If the build failed
if (status == CL_BUILD_PROGRAM_FAILURE) { if (status == CL_BUILD_PROGRAM_FAILURE) {
// Determine the size of the log // Determine the size of the log
size_t log_size; size_t log_size;
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
// Allocate memory for the log // Allocate memory for the log
char *log = (char *)malloc(log_size); char *log = new char[log_size];
// Get the log // Get the log
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL); clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
// Print the log // Print the log
printf("%s\n", log); std::cout << log << std::endl;
} }
cl_kernel kernel = clCreateKernel(program, "helloworld", NULL); // Now create the kernel
cl_kernel kernel = clCreateKernel(program, "conway", NULL);
// ======================================= Setup grid ========================================================= // ======================================= Setup grid =========================================================
@ -149,26 +147,42 @@ int main(int argc, char* argv[])
std::mt19937 rng(time(NULL)); std::mt19937 rng(time(NULL));
std::uniform_int_distribution<int> rgen(0, 12); // 25% chance std::uniform_int_distribution<int> rgen(0, 12); // 25% chance
// Init the grid // Init the grids
char* grid = new char[GRID_WIDTH * GRID_HEIGHT* 2]; unsigned char* front_grid = new unsigned char[GRID_WIDTH * GRID_HEIGHT* 2];
for (int i = 0; i < 1000 * 1000 * 2; i += 2) { for (int i = 0; i < 1000 * 1000; i += 2) {
if (rgen(rng) == 1) { if (rgen(rng) == 1) {
grid[i] = 1; front_grid[i] = 1;
grid[i + 1] = 1;
} }
else { else {
grid[i] = 0; front_grid[i] = 0;
grid[i + 1] = 0;
} }
} }
unsigned char* rear_grid = new unsigned char[GRID_WIDTH * GRID_HEIGHT * 2];
for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT; i++) {
rear_grid[i] = front_grid[i];
}
// ====================================== Setup SFML ========================================================== // ====================================== Setup SFML ==========================================================
// Spites for drawing, probably where the biggest slowdown is sf::Uint8* asdf = rear_grid;
sf::RectangleShape live_node;
live_node.setFillColor(sf::Color(145, 181, 207)); sf::Uint8* pixel_array = new sf::Uint8[WINDOW_X * WINDOW_Y * 4];
live_node.setSize(sf::Vector2f(1, 1));
for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT * 2; i += 2) {
int p = i / 2;
pixel_array[p * 4] = 49; // R?
pixel_array[p * 4 + 1] = 68; // G?
pixel_array[p * 4 + 2] = 72; // B?
pixel_array[p * 4 + 3] = 255; // A?
}
char* arr = new char[1000 * 1000];
// Init window, and loop data // Init window, and loop data
sf::RenderWindow window(sf::VideoMode(GRID_WIDTH, GRID_HEIGHT), "Classic Games"); sf::RenderWindow window(sf::VideoMode(GRID_WIDTH, GRID_HEIGHT), "Classic Games");
@ -179,27 +193,18 @@ int main(int argc, char* argv[])
int err = 0; int err = 0;
cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), (void*)grid, &err); cl_mem frontBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * sizeof(char), (void*)front_grid, &err);
cl_mem rearBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * sizeof(char), (void*)rear_grid, &err);
cl_mem workerCountBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &WORKER_SIZE, &err); cl_mem workerCountBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &WORKER_SIZE, &err);
cl_mem gridWidthBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_WIDTH, &err); cl_mem gridWidthBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_WIDTH, &err);
cl_mem gridHeightBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_HEIGHT, &err); cl_mem gridHeightBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_HEIGHT, &err);
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer); status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&frontBuffer);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&workerCountBuffer); status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&workerCountBuffer);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&gridWidthBuffer); status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&gridWidthBuffer);
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&gridHeightBuffer); status = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&gridHeightBuffer);
sf::Uint8* pixel_array = new sf::Uint8[WINDOW_X * WINDOW_Y * 4];
for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT * 2; i += 2) {
int p = i / 2;
pixel_array[p * 4] = 49; // R?
pixel_array[p * 4 + 1] = 68; // G?
pixel_array[p * 4 + 2] = 72; // B?
pixel_array[p * 4 + 3] = 255; // A?
}
sf::Texture texture; sf::Texture texture;
texture.create(WINDOW_X, WINDOW_Y); texture.create(WINDOW_X, WINDOW_Y);
@ -231,7 +236,7 @@ int main(int argc, char* argv[])
// ======================================= OpenCL Shtuff ============================================= // ======================================= OpenCL Shtuff =============================================
// Update the data in GPU memory // Update the data in GPU memory
status = clEnqueueWriteBuffer(commandQueue, inputBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), (void*)grid, NULL, 0, NULL); //status = clEnqueueWriteBuffer(commandQueue, frontBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), (void*)grid, NULL, 0, NULL);
// Work size, for each y line // Work size, for each y line
size_t global_work_size[1] = { WORKER_SIZE }; size_t global_work_size[1] = { WORKER_SIZE };
@ -240,30 +245,8 @@ int main(int argc, char* argv[])
status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
// Get output, put back into grid // Get output, put back into grid
status = clEnqueueReadBuffer(commandQueue, inputBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), (void*)grid, 0, NULL, NULL); status = clEnqueueReadBuffer(commandQueue, frontBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * sizeof(char), (void*)rear_grid, 0, NULL, NULL);
for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT * 2; i += 2) {
int p = i / 2;
if (grid[i + 1] == 1) {
pixel_array[p * 4] = 255; // R?
pixel_array[p * 4 + 1] = 255; // G?
pixel_array[p * 4 + 2] = 255; // B?
pixel_array[p * 4 + 3] = 255; // A?
}
else if (grid[i] == 1){
pixel_array[p * 4] = 49; // R?
pixel_array[p * 4 + 1] = 68; // G?
pixel_array[p * 4 + 2] = 72; // B?
pixel_array[p * 4 + 3] = 255; // A?
}
grid[i] = grid[i + 1];
}
texture.update(pixel_array); texture.update(pixel_array);
window.draw(sprite); window.draw(sprite);
@ -275,7 +258,7 @@ int main(int argc, char* argv[])
// Temporary // Temporary
status = clReleaseMemObject(inputBuffer); status = clReleaseMemObject(frontBuffer);
status = clReleaseMemObject(workerCountBuffer); status = clReleaseMemObject(workerCountBuffer);
status = clReleaseMemObject(gridWidthBuffer); status = clReleaseMemObject(gridWidthBuffer);
status = clReleaseMemObject(gridHeightBuffer); status = clReleaseMemObject(gridHeightBuffer);

@ -117,7 +117,7 @@
<ImportLibrary>$(SolutionDir)bin/x86/Debug/HelloWorld.lib</ImportLibrary> <ImportLibrary>$(SolutionDir)bin/x86/Debug/HelloWorld.lib</ImportLibrary>
</Link> </Link>
<PostBuildEvent> <PostBuildEvent>
<Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
</Command> </Command>
</PostBuildEvent> </PostBuildEvent>
</ItemDefinitionGroup> </ItemDefinitionGroup>
@ -144,7 +144,7 @@
<AdditionalOptions> /machine:x64 /debug %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions> /machine:x64 /debug %(AdditionalOptions)</AdditionalOptions>
</Link> </Link>
<PostBuildEvent> <PostBuildEvent>
<Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
</Command> </Command>
</PostBuildEvent> </PostBuildEvent>
</ItemDefinitionGroup> </ItemDefinitionGroup>
@ -182,7 +182,7 @@
<LinkLibraryDependencies>false</LinkLibraryDependencies> <LinkLibraryDependencies>false</LinkLibraryDependencies>
</ProjectReference> </ProjectReference>
<PostBuildEvent> <PostBuildEvent>
<Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
</Command> </Command>
</PostBuildEvent> </PostBuildEvent>
</ItemDefinitionGroup> </ItemDefinitionGroup>
@ -219,15 +219,15 @@
<AdditionalOptions> /machine:x64 %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions> /machine:x64 %(AdditionalOptions)</AdditionalOptions>
</Link> </Link>
<PostBuildEvent> <PostBuildEvent>
<Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
</Command> </Command>
</PostBuildEvent> </PostBuildEvent>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="HelloWorld.cpp" /> <ClCompile Include="Conway.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="HelloWorld_Kernel.cl" /> <None Include="conway_kernel.cl" />
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">

@ -1,4 +1,4 @@
__kernel void helloworld(__global char* in, __global int* num_workers, __global int* grid_width, __global int* grid_height) __kernel void conway(__global unsigned char* front_grid, __global unsigned char* rear_grid, __global int* num_workers, __global int* grid_width, __global int* grid_height)
{ {
// Caclulate the start and end range that this worker will be calculating // Caclulate the start and end range that this worker will be calculating

@ -117,7 +117,7 @@
<ImportLibrary>$(SolutionDir)bin/x86/Debug/HelloWorld.lib</ImportLibrary> <ImportLibrary>$(SolutionDir)bin/x86/Debug/HelloWorld.lib</ImportLibrary>
</Link> </Link>
<PostBuildEvent> <PostBuildEvent>
<Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
</Command> </Command>
</PostBuildEvent> </PostBuildEvent>
</ItemDefinitionGroup> </ItemDefinitionGroup>
@ -144,7 +144,7 @@
<AdditionalOptions> /machine:x64 /debug %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions> /machine:x64 /debug %(AdditionalOptions)</AdditionalOptions>
</Link> </Link>
<PostBuildEvent> <PostBuildEvent>
<Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
</Command> </Command>
</PostBuildEvent> </PostBuildEvent>
</ItemDefinitionGroup> </ItemDefinitionGroup>
@ -182,7 +182,7 @@
<LinkLibraryDependencies>false</LinkLibraryDependencies> <LinkLibraryDependencies>false</LinkLibraryDependencies>
</ProjectReference> </ProjectReference>
<PostBuildEvent> <PostBuildEvent>
<Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
</Command> </Command>
</PostBuildEvent> </PostBuildEvent>
</ItemDefinitionGroup> </ItemDefinitionGroup>
@ -219,15 +219,15 @@
<AdditionalOptions> /machine:x64 %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions> /machine:x64 %(AdditionalOptions)</AdditionalOptions>
</Link> </Link>
<PostBuildEvent> <PostBuildEvent>
<Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
</Command> </Command>
</PostBuildEvent> </PostBuildEvent>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="HelloWorld.cpp" /> <ClCompile Include="Conway.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="HelloWorld_Kernel.cl" /> <None Include="conway_kernel.cl" />
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">

Loading…
Cancel
Save