I thiiiiink that the gpu is computing things correctly now, but I'm having trouble getting data back out of the gpu. More tomorrow

master
MitchellHansen 10 years ago
parent 5c593695e8
commit 7d1cc67143

@ -8,11 +8,27 @@
#include <random> #include <random>
#include <ctime> #include <ctime>
#include <SFML/Graphics.hpp> #include <SFML/Graphics.hpp>
#include <windows.h>
#define SUCCESS 0 #define SUCCESS 0
#define FAILURE 1 #define FAILURE 1
using namespace std;
float elap_time() {
static __int64 start = 0;
static __int64 frequency = 0;
if (start == 0) {
QueryPerformanceCounter((LARGE_INTEGER*)&start);
QueryPerformanceFrequency((LARGE_INTEGER*)&frequency);
return 0.0f;
}
__int64 counter = 0;
QueryPerformanceCounter((LARGE_INTEGER*)&counter);
return (float)((counter - start) / double(frequency));
}
/* convert the kernel file into a string */ /* convert the kernel file into a string */
int convertToString(const char *filename, std::string& s) int convertToString(const char *filename, std::string& s)
@ -41,43 +57,30 @@ int convertToString(const char *filename, std::string& s)
delete[] str; delete[] str;
return 0; return 0;
} }
cout<<"Error: failed to open file\n:"<<filename<<endl; std::cout << "Error: failed to open file\n:" << filename << std::endl;
return FAILURE; return FAILURE;
} }
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
int WINDOW_X = 1000;
int WINDOW_Y = 1000;
int GRID_WIDTH = 1000;
int GRID_HEIGHT = 1000;
int WORKER_SIZE = 1000;
// ============================== OpenCL Setup ==================================================================
// 1000 x 1000 grid
std::mt19937 rng(time(NULL));
std::uniform_int_distribution<int> rgen(0, 4); // 25% chance
char* grid = new char[1000 * 1000 * 2];
for (int i = 0; i < 1000 * 1000 * 2; i += 2) {
if (rgen(rng) == 1) {
grid[i] = 1;
grid[i + 1] = 1;
}
else {
grid[i] = 0;
grid[i + 1] = 0;
}
}
/*Step1: Getting platforms and choose an available one.*/ /*Step1: Getting platforms and choose an available one.*/
cl_uint numPlatforms; //the NO. of platforms cl_uint numPlatforms; //the NO. of platforms
cl_platform_id platform = NULL; //the chosen platform cl_platform_id platform = NULL; //the chosen platform
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status != CL_SUCCESS) if (status != CL_SUCCESS) {
{ std::cout << "Error: Getting platforms!" << std::endl;
cout << "Error: Getting platforms!" << endl;
return FAILURE; return FAILURE;
} }
/*For clarity, choose the first available platform. */ // Choose the first available platform
if(numPlatforms > 0) if(numPlatforms > 0)
{ {
cl_platform_id* platforms = (cl_platform_id* )malloc(numPlatforms* sizeof(cl_platform_id)); cl_platform_id* platforms = (cl_platform_id* )malloc(numPlatforms* sizeof(cl_platform_id));
@ -90,16 +93,14 @@ int main(int argc, char* argv[])
cl_uint numDevices = 0; cl_uint numDevices = 0;
cl_device_id *devices; cl_device_id *devices;
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
if (numDevices == 0) //no GPU available. if (numDevices == 0) { //no GPU available.
{ std::cout << "No GPU device available." << std::endl;
cout << "No GPU device available." << endl; std::cout << "Choose CPU as default device." << std::endl;
cout << "Choose CPU as default device." << endl;
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices);
devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id)); devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL);
} }
else else {
{
devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id)); devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
} }
@ -111,51 +112,73 @@ int main(int argc, char* argv[])
/*Step 4: Creating command queue associate with the context.*/ /*Step 4: Creating command queue associate with the context.*/
cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL); cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
// ============================== Kernel Compilation, Setup ====================================================
/*Step 5: Create program object */ /*Step 5: Create program object */
const char *filename = "HelloWorld_Kernel.cl"; const char *filename = "HelloWorld_Kernel.cl";
string sourceStr; std::string sourceStr;
status = convertToString(filename, sourceStr); status = convertToString(filename, sourceStr);
const char *source = sourceStr.c_str(); const char *source = sourceStr.c_str();
size_t sourceSize[] = {strlen(source)}; size_t sourceSize[] = {strlen(source)};
cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL); cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
/*Step 6: Build program. */ // Build program and set kernel
status=clBuildProgram(program, 1,devices,NULL,NULL,NULL); status=clBuildProgram(program, 1,devices,NULL,NULL,NULL);
/*Step 7: Initial input,output for the host and create memory objects for the kernel*/ if (status == CL_BUILD_PROGRAM_FAILURE) {
const char* input = "GdkknVnqkc"; // Determine the size of the log
size_t strlength = strlen(input); size_t log_size;
cout << "input string:" << endl; clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
cout << input << endl;
char *output = (char*) malloc(strlength + 1);
cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, (strlength + 1) * sizeof(char),(void *) input, NULL); // Allocate memory for the log
cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY , (strlength + 1) * sizeof(char), NULL, NULL); char *log = (char *)malloc(log_size);
/*Step 8: Create kernel object */ // Get the log
cl_kernel kernel = clCreateKernel(program,"helloworld", NULL); clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
/*Step 9: Sets Kernel arguments.*/ // Print the log
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer); printf("%s\n", log);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&outputBuffer); }
// ======================================= START SFML ==========================================================
cl_kernel kernel = clCreateKernel(program, "helloworld", NULL);
// ======================================= Setup grid =========================================================
// Setup the rng
std::mt19937 rng(time(NULL));
std::uniform_int_distribution<int> rgen(0, 4); // 25% chance
// Init the grid
char* grid = new char[GRID_WIDTH * GRID_HEIGHT* 2];
for (int i = 0; i < 1000 * 1000 * 2; i += 2) {
if (rgen(rng) == 1) {
grid[i] = 1;
grid[i + 1] = 1;
}
else {
grid[i] = 0;
grid[i + 1] = 0;
}
}
// ====================================== Setup SFML ==========================================================
// Spites for drawing, probably where the biggest slowdown is // Spites for drawing, probably where the biggest slowdown is
sf::RectangleShape live_node; sf::RectangleShape live_node;
live_node.setFillColor(sf::Color(145, 181, 207)); live_node.setFillColor(sf::Color(145, 181, 207));
live_node.setSize(sf::Vector2f(WINDOW_X / Node::x_bound, WINDOW_Y / Node::y_bound)); live_node.setSize(sf::Vector2f(1, 1));
// Init window, and loop data // Init window, and loop data
sf::RenderWindow window(sf::VideoMode(WINDOW_X, WINDOW_Y), "Classic Games"); sf::RenderWindow window(sf::VideoMode(GRID_WIDTH, GRID_HEIGHT), "Classic Games");
float step_size = 0.0005f; float step_size = 0.0005f;
double frame_time = 0.0, elapsed_time = 0.0, delta_time = 0.0, accumulator_time = 0.0, current_time = 0.0; double frame_time = 0.0, elapsed_time = 0.0, delta_time = 0.0, accumulator_time = 0.0, current_time = 0.0;
int frame_count = 0; int frame_count = 0;
std::stack<std::thread> thread_stack;
// ===================================== Loop ==================================================================
while (window.isOpen()) { while (window.isOpen()) {
sf::Event event; sf::Event event;
@ -165,7 +188,7 @@ int main(int argc, char* argv[])
} }
// Time keeping // Time keeping
elapsed_time = elap_time(); //elapsed_time = elap_time();
delta_time = elapsed_time - current_time; delta_time = elapsed_time - current_time;
current_time = elapsed_time; current_time = elapsed_time;
if (delta_time > 0.02f) if (delta_time > 0.02f)
@ -174,77 +197,73 @@ int main(int argc, char* argv[])
while ((accumulator_time - step_size) >= step_size) { while ((accumulator_time - step_size) >= step_size) {
accumulator_time -= step_size; accumulator_time -= step_size;
// Do nothing, FPS tied update() // Do nothing, FPS tied update()
} }
// Implicit dead node color // ======================================= OpenCL Shtuff =============================================
window.clear(sf::Color(49, 68, 72));
for (int i = 0; i < 12; i++) { int err = 0;
thread_stack.emplace(updateRange, &node_vec, (node_vec.size() / 12)* i, (node_vec.size() / 12)* (i + 1)); cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), (void*)grid, &err);
} cl_mem workerCountBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &WORKER_SIZE, &err);
while (!thread_stack.empty()) { cl_mem gridWidthBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_WIDTH, &err);
thread_stack.top().join(); cl_mem gridHeightBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_HEIGHT, &err);
thread_stack.pop();
} status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&workerCountBuffer);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&gridWidthBuffer);
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&gridHeightBuffer);
// One work item per group, don't really know if this impacts performance
size_t global_work_size[1] = { 1 };
//for (int i = 0; i < node_vec.size(); i++) { // Run the kernel
// node_vec.at(i).Update(&node_vec); status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
//}
for (int i = 0; i < node_vec.size(); i++) { // Get output, put back into grid
node_vec[i].ShiftState(); cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), NULL, NULL);
status = clEnqueueReadBuffer(commandQueue, outputBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), grid, 0, NULL, NULL);
// Temporary
status = clReleaseMemObject(inputBuffer);
status = clReleaseMemObject(workerCountBuffer);
status = clReleaseMemObject(gridWidthBuffer);
status = clReleaseMemObject(gridHeightBuffer);
// Swap status's
for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT * 2; i += 2) {
grid[i] = grid[i + 1];
} }
for (int i = 0; i < node_vec.size(); i++) { for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT * 2; i += 2) {
if (node_vec.at(i).CurrentState() == true) { if (!grid[i]) {
live_node.setPosition((i % Node::x_bound) * live_node.getGlobalBounds().width, (i / Node::x_bound) * live_node.getGlobalBounds().height); live_node.setPosition(sf::Vector2f((i % GRID_WIDTH) * (i / GRID_WIDTH), i / GRID_WIDTH));
window.draw(live_node); window.draw(live_node);
} }
else {
//dead_node.setPosition(i % Node::x_bound * dead_node.getGlobalBounds().width, i / Node::x_bound * dead_node.getGlobalBounds().height);
//window.draw(live_node);
}
} }
// Implicit dead node color
window.clear(sf::Color(49, 68, 72));
frame_count++; frame_count++;
window.display(); window.display();
}
// ======================================= END SFML ==========================================================
/*Step 10: Running the kernel.*/
size_t global_work_size[1] = {strlength};
status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
/*Step 11: Read the cout put back to host memory.*/
status = clEnqueueReadBuffer(commandQueue, outputBuffer, CL_TRUE, 0, strlength * sizeof(char), output, 0, NULL, NULL);
output[strlength] = '\0'; //Add the terminal character to the end of output.
cout << "\noutput string:" << endl;
cout << output << endl;
/*Step 12: Clean the resources.*/ /*Step 12: Clean the resources.*/
status = clReleaseKernel(kernel); //Release kernel. status = clReleaseKernel(kernel); //Release kernel.
status = clReleaseProgram(program); //Release the program object. status = clReleaseProgram(program); //Release the program object.
status = clReleaseMemObject(inputBuffer); //Release mem object.
status = clReleaseMemObject(outputBuffer);
status = clReleaseCommandQueue(commandQueue); //Release Command queue. status = clReleaseCommandQueue(commandQueue); //Release Command queue.
status = clReleaseContext(context); //Release context. status = clReleaseContext(context); //Release context.
if (output != NULL)
{
free(output);
output = NULL;
}
if (devices != NULL) if (devices != NULL)
{ {
free(devices); free(devices);
devices = NULL; devices = NULL;
} }
std::cout<<"Passed!\n";
return SUCCESS; return SUCCESS;
} }

@ -1,11 +1,11 @@
__kernel void helloworld(__global char* in, __global int num_workers, __global int grid_width, __global int grid_height) __kernel void helloworld(__global char* in, __global int* num_workers, __global int* grid_width, __global int* grid_height)
{ {
// Caclulate the start and end range that this worker will be calculating // Caclulate the start and end range that this worker will be calculating
int data_length = grid_width * grid_height; int data_length = *grid_width * *grid_height;
int start_range = (data_length / num_workers) * get_global_id(0) * 2; // * 2 = padding int start_range = (data_length / *num_workers) * get_global_id(0) * 2; // * 2 = padding
int end_range = (data_length / num_workers) * (get_global_id(0) + 1) * 2; int end_range = (data_length / *num_workers) * (get_global_id(0) + 1) * 2;
// x, y + 1 // x, y + 1
@ -14,30 +14,30 @@ __kernel void helloworld(__global char* in, __global int num_workers, __global i
for (int i = start_range; i < end_range; i += 2){ for (int i = start_range; i < end_range; i += 2){
// add all 8 blocks to neghbors // add all 8 blocks to neghbors
neighbors = 0;
// Top // Top
neighbors += in[i - grid_width * 2]; neighbors += in[i - *grid_width * 2];
// Top right // Top right
neightbors += in[i - grid_width * 2 + 2]; neighbors += in[i - *grid_width * 2 + 2];
// Right // Right
neighbors += in[i + 2]; neighbors += in[i + 2];
// Bottom Right // Bottom Right
neighbors += in[i + grid_width * 2 + 2]; neighbors += in[i + *grid_width * 2 + 2];
// Bottom // Bottom
neighbors += in[i + grid_width * 2]; neighbors += in[i + *grid_width * 2];
// Bottom Left // Bottom Left
neighbors += in[i + grid_width * 2 - 2]; neighbors += in[i + *grid_width * 2 - 2];
// Left // Left
neighbors += in[i - 2]; neighbors += in[i - 2];
// Top left // Top left
neighbors += in[i - grid_width * 2 - 2]; neighbors += in[i - *grid_width * 2 - 2];
// push living status to the padded second char // push living status to the padded second char

Loading…
Cancel
Save