I thiiiiink that the gpu is computing things correctly now, but I'm having trouble getting data back out of the gpu. More tomorrow

master
MitchellHansen 9 years ago
parent 5c593695e8
commit 7d1cc67143

@ -8,11 +8,27 @@
#include <random> #include <random>
#include <ctime> #include <ctime>
#include <SFML/Graphics.hpp> #include <SFML/Graphics.hpp>
#include <windows.h>
#define SUCCESS 0 #define SUCCESS 0
#define FAILURE 1 #define FAILURE 1
using namespace std;
float elap_time() {
static __int64 start = 0;
static __int64 frequency = 0;
if (start == 0) {
QueryPerformanceCounter((LARGE_INTEGER*)&start);
QueryPerformanceFrequency((LARGE_INTEGER*)&frequency);
return 0.0f;
}
__int64 counter = 0;
QueryPerformanceCounter((LARGE_INTEGER*)&counter);
return (float)((counter - start) / double(frequency));
}
/* convert the kernel file into a string */ /* convert the kernel file into a string */
int convertToString(const char *filename, std::string& s) int convertToString(const char *filename, std::string& s)
@ -41,43 +57,30 @@ int convertToString(const char *filename, std::string& s)
delete[] str; delete[] str;
return 0; return 0;
} }
cout<<"Error: failed to open file\n:"<<filename<<endl; std::cout << "Error: failed to open file\n:" << filename << std::endl;
return FAILURE; return FAILURE;
} }
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
int WINDOW_X = 1000;
int WINDOW_Y = 1000;
int GRID_WIDTH = 1000;
int GRID_HEIGHT = 1000;
int WORKER_SIZE = 1000;
// ============================== OpenCL Setup ==================================================================
// 1000 x 1000 grid
std::mt19937 rng(time(NULL));
std::uniform_int_distribution<int> rgen(0, 4); // 25% chance
char* grid = new char[1000 * 1000 * 2];
for (int i = 0; i < 1000 * 1000 * 2; i += 2) {
if (rgen(rng) == 1) {
grid[i] = 1;
grid[i + 1] = 1;
}
else {
grid[i] = 0;
grid[i + 1] = 0;
}
}
/*Step1: Getting platforms and choose an available one.*/ /*Step1: Getting platforms and choose an available one.*/
cl_uint numPlatforms; //the NO. of platforms cl_uint numPlatforms; //the NO. of platforms
cl_platform_id platform = NULL; //the chosen platform cl_platform_id platform = NULL; //the chosen platform
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status != CL_SUCCESS) if (status != CL_SUCCESS) {
{ std::cout << "Error: Getting platforms!" << std::endl;
cout << "Error: Getting platforms!" << endl;
return FAILURE; return FAILURE;
} }
/*For clarity, choose the first available platform. */ // Choose the first available platform
if(numPlatforms > 0) if(numPlatforms > 0)
{ {
cl_platform_id* platforms = (cl_platform_id* )malloc(numPlatforms* sizeof(cl_platform_id)); cl_platform_id* platforms = (cl_platform_id* )malloc(numPlatforms* sizeof(cl_platform_id));
@ -90,16 +93,14 @@ int main(int argc, char* argv[])
cl_uint numDevices = 0; cl_uint numDevices = 0;
cl_device_id *devices; cl_device_id *devices;
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
if (numDevices == 0) //no GPU available. if (numDevices == 0) { //no GPU available.
{ std::cout << "No GPU device available." << std::endl;
cout << "No GPU device available." << endl; std::cout << "Choose CPU as default device." << std::endl;
cout << "Choose CPU as default device." << endl;
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices);
devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id)); devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL);
} }
else else {
{
devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id)); devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
} }
@ -111,51 +112,73 @@ int main(int argc, char* argv[])
/*Step 4: Creating command queue associate with the context.*/ /*Step 4: Creating command queue associate with the context.*/
cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL); cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
// ============================== Kernel Compilation, Setup ====================================================
/*Step 5: Create program object */ /*Step 5: Create program object */
const char *filename = "HelloWorld_Kernel.cl"; const char *filename = "HelloWorld_Kernel.cl";
string sourceStr; std::string sourceStr;
status = convertToString(filename, sourceStr); status = convertToString(filename, sourceStr);
const char *source = sourceStr.c_str(); const char *source = sourceStr.c_str();
size_t sourceSize[] = {strlen(source)}; size_t sourceSize[] = {strlen(source)};
cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL); cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
/*Step 6: Build program. */ // Build program and set kernel
status=clBuildProgram(program, 1,devices,NULL,NULL,NULL); status=clBuildProgram(program, 1,devices,NULL,NULL,NULL);
/*Step 7: Initial input,output for the host and create memory objects for the kernel*/ if (status == CL_BUILD_PROGRAM_FAILURE) {
const char* input = "GdkknVnqkc"; // Determine the size of the log
size_t strlength = strlen(input); size_t log_size;
cout << "input string:" << endl; clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
cout << input << endl;
char *output = (char*) malloc(strlength + 1);
cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, (strlength + 1) * sizeof(char),(void *) input, NULL); // Allocate memory for the log
cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY , (strlength + 1) * sizeof(char), NULL, NULL); char *log = (char *)malloc(log_size);
/*Step 8: Create kernel object */ // Get the log
cl_kernel kernel = clCreateKernel(program,"helloworld", NULL); clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
/*Step 9: Sets Kernel arguments.*/ // Print the log
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer); printf("%s\n", log);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&outputBuffer); }
cl_kernel kernel = clCreateKernel(program, "helloworld", NULL);
// ======================================= START SFML ========================================================== // ======================================= Setup grid =========================================================
// Setup the rng
std::mt19937 rng(time(NULL));
std::uniform_int_distribution<int> rgen(0, 4); // 25% chance
// Init the grid
char* grid = new char[GRID_WIDTH * GRID_HEIGHT* 2];
for (int i = 0; i < 1000 * 1000 * 2; i += 2) {
if (rgen(rng) == 1) {
grid[i] = 1;
grid[i + 1] = 1;
}
else {
grid[i] = 0;
grid[i + 1] = 0;
}
}
// ====================================== Setup SFML ==========================================================
// Spites for drawing, probably where the biggest slowdown is // Spites for drawing, probably where the biggest slowdown is
sf::RectangleShape live_node; sf::RectangleShape live_node;
live_node.setFillColor(sf::Color(145, 181, 207)); live_node.setFillColor(sf::Color(145, 181, 207));
live_node.setSize(sf::Vector2f(WINDOW_X / Node::x_bound, WINDOW_Y / Node::y_bound)); live_node.setSize(sf::Vector2f(1, 1));
// Init window, and loop data // Init window, and loop data
sf::RenderWindow window(sf::VideoMode(WINDOW_X, WINDOW_Y), "Classic Games"); sf::RenderWindow window(sf::VideoMode(GRID_WIDTH, GRID_HEIGHT), "Classic Games");
float step_size = 0.0005f; float step_size = 0.0005f;
double frame_time = 0.0, elapsed_time = 0.0, delta_time = 0.0, accumulator_time = 0.0, current_time = 0.0; double frame_time = 0.0, elapsed_time = 0.0, delta_time = 0.0, accumulator_time = 0.0, current_time = 0.0;
int frame_count = 0; int frame_count = 0;
std::stack<std::thread> thread_stack;
// ===================================== Loop ==================================================================
while (window.isOpen()) { while (window.isOpen()) {
sf::Event event; sf::Event event;
@ -165,7 +188,7 @@ int main(int argc, char* argv[])
} }
// Time keeping // Time keeping
elapsed_time = elap_time(); //elapsed_time = elap_time();
delta_time = elapsed_time - current_time; delta_time = elapsed_time - current_time;
current_time = elapsed_time; current_time = elapsed_time;
if (delta_time > 0.02f) if (delta_time > 0.02f)
@ -174,77 +197,73 @@ int main(int argc, char* argv[])
while ((accumulator_time - step_size) >= step_size) { while ((accumulator_time - step_size) >= step_size) {
accumulator_time -= step_size; accumulator_time -= step_size;
// Do nothing, FPS tied update() // Do nothing, FPS tied update()
} }
// Implicit dead node color // ======================================= OpenCL Shtuff =============================================
window.clear(sf::Color(49, 68, 72));
for (int i = 0; i < 12; i++) { int err = 0;
thread_stack.emplace(updateRange, &node_vec, (node_vec.size() / 12)* i, (node_vec.size() / 12)* (i + 1)); cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), (void*)grid, &err);
} cl_mem workerCountBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &WORKER_SIZE, &err);
while (!thread_stack.empty()) { cl_mem gridWidthBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_WIDTH, &err);
thread_stack.top().join(); cl_mem gridHeightBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_HEIGHT, &err);
thread_stack.pop();
} status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&workerCountBuffer);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&gridWidthBuffer);
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&gridHeightBuffer);
// One work item per group, don't really know if this impacts performance
size_t global_work_size[1] = { 1 };
// Run the kernel
status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
// Get output, put back into grid
cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), NULL, NULL);
status = clEnqueueReadBuffer(commandQueue, outputBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), grid, 0, NULL, NULL);
//for (int i = 0; i < node_vec.size(); i++) { // Temporary
// node_vec.at(i).Update(&node_vec); status = clReleaseMemObject(inputBuffer);
//} status = clReleaseMemObject(workerCountBuffer);
status = clReleaseMemObject(gridWidthBuffer);
status = clReleaseMemObject(gridHeightBuffer);
for (int i = 0; i < node_vec.size(); i++) { // Swap status's
node_vec[i].ShiftState(); for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT * 2; i += 2) {
grid[i] = grid[i + 1];
} }
for (int i = 0; i < node_vec.size(); i++) { for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT * 2; i += 2) {
if (node_vec.at(i).CurrentState() == true) { if (!grid[i]) {
live_node.setPosition((i % Node::x_bound) * live_node.getGlobalBounds().width, (i / Node::x_bound) * live_node.getGlobalBounds().height); live_node.setPosition(sf::Vector2f((i % GRID_WIDTH) * (i / GRID_WIDTH), i / GRID_WIDTH));
window.draw(live_node); window.draw(live_node);
} }
else {
//dead_node.setPosition(i % Node::x_bound * dead_node.getGlobalBounds().width, i / Node::x_bound * dead_node.getGlobalBounds().height);
//window.draw(live_node);
}
} }
// Implicit dead node color
window.clear(sf::Color(49, 68, 72));
frame_count++; frame_count++;
window.display(); window.display();
}
// ======================================= END SFML ==========================================================
/*Step 10: Running the kernel.*/
size_t global_work_size[1] = {strlength};
status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
/*Step 11: Read the cout put back to host memory.*/
status = clEnqueueReadBuffer(commandQueue, outputBuffer, CL_TRUE, 0, strlength * sizeof(char), output, 0, NULL, NULL);
output[strlength] = '\0'; //Add the terminal character to the end of output.
cout << "\noutput string:" << endl;
cout << output << endl;
/*Step 12: Clean the resources.*/ /*Step 12: Clean the resources.*/
status = clReleaseKernel(kernel); //Release kernel. status = clReleaseKernel(kernel); //Release kernel.
status = clReleaseProgram(program); //Release the program object. status = clReleaseProgram(program); //Release the program object.
status = clReleaseMemObject(inputBuffer); //Release mem object.
status = clReleaseMemObject(outputBuffer);
status = clReleaseCommandQueue(commandQueue); //Release Command queue. status = clReleaseCommandQueue(commandQueue); //Release Command queue.
status = clReleaseContext(context); //Release context. status = clReleaseContext(context); //Release context.
if (output != NULL)
{
free(output);
output = NULL;
}
if (devices != NULL) if (devices != NULL)
{ {
free(devices); free(devices);
devices = NULL; devices = NULL;
} }
std::cout<<"Passed!\n";
return SUCCESS; return SUCCESS;
} }

@ -1,11 +1,11 @@
__kernel void helloworld(__global char* in, __global int num_workers, __global int grid_width, __global int grid_height) __kernel void helloworld(__global char* in, __global int* num_workers, __global int* grid_width, __global int* grid_height)
{ {
// Caclulate the start and end range that this worker will be calculating // Caclulate the start and end range that this worker will be calculating
int data_length = grid_width * grid_height; int data_length = *grid_width * *grid_height;
int start_range = (data_length / num_workers) * get_global_id(0) * 2; // * 2 = padding int start_range = (data_length / *num_workers) * get_global_id(0) * 2; // * 2 = padding
int end_range = (data_length / num_workers) * (get_global_id(0) + 1) * 2; int end_range = (data_length / *num_workers) * (get_global_id(0) + 1) * 2;
// x, y + 1 // x, y + 1
@ -14,30 +14,30 @@ __kernel void helloworld(__global char* in, __global int num_workers, __global i
for (int i = start_range; i < end_range; i += 2){ for (int i = start_range; i < end_range; i += 2){
// add all 8 blocks to neghbors // add all 8 blocks to neghbors
neighbors = 0;
// Top // Top
neighbors += in[i - grid_width * 2]; neighbors += in[i - *grid_width * 2];
// Top right // Top right
neightbors += in[i - grid_width * 2 + 2]; neighbors += in[i - *grid_width * 2 + 2];
// Right // Right
neighbors += in[i + 2]; neighbors += in[i + 2];
// Bottom Right // Bottom Right
neighbors += in[i + grid_width * 2 + 2]; neighbors += in[i + *grid_width * 2 + 2];
// Bottom // Bottom
neighbors += in[i + grid_width * 2]; neighbors += in[i + *grid_width * 2];
// Bottom Left // Bottom Left
neighbors += in[i + grid_width * 2 - 2]; neighbors += in[i + *grid_width * 2 - 2];
// Left // Left
neighbors += in[i - 2]; neighbors += in[i - 2];
// Top left // Top left
neighbors += in[i - grid_width * 2 - 2]; neighbors += in[i - *grid_width * 2 - 2];
// push living status to the padded second char // push living status to the padded second char

Loading…
Cancel
Save