From 36f72f5cbe7bfd77d85e2c6bb6d0f1385faa8979 Mon Sep 17 00:00:00 2001 From: MitchellHansen Date: Fri, 31 Mar 2017 21:00:06 -0700 Subject: [PATCH] Converting over to run using OpenCL --- CMakeLists.txt | 66 ++++- include/OpenCL.h | 112 +++++++ include/util.hpp | 83 ------ kernels/mandlebrot.cl | 39 +++ src/OpenCL.cpp | 669 ++++++++++++++++++++++++++++++++++++++++++ src/main.cpp | 17 +- 6 files changed, 897 insertions(+), 89 deletions(-) create mode 100644 include/OpenCL.h create mode 100644 kernels/mandlebrot.cl create mode 100644 src/OpenCL.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 76acfe7..a90b03c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,8 @@ message(STATUS "CMake version: ${CMAKE_VERSION}") cmake_minimum_required(VERSION 3.1) +set_property(GLOBAL PROPERTY USE_FOLDERS ON) + # Set the project name set(PNAME Mandlebrot) project(${PNAME}) @@ -17,18 +19,78 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}) find_package(SFML 2.1 COMPONENTS ${SFML_COMPONENTS} REQUIRED) message(STATUS "SFML found: ${SFML_FOUND}") -# Include the directories for SFML's headers +# Find OpenCL +find_package( OpenCL REQUIRED ) +message(STATUS "OpenCL found: ${OPENCL_FOUND}") + +# Find OpenGL +find_package( OpenGL REQUIRED) +message(STATUS "OpenGL found: ${OPENGL_FOUND}") + +# Include the directories for the main program, GL, CL and SFML's headers include_directories(${SFML_INCLUDE_DIR}) +include_directories(${OpenCL_INCLUDE_DIRS}) include_directories(include) # Glob all thr sources into their values file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE HEADERS "include/*.h" "include/*.hpp") +file(GLOB_RECURSE KERNELS "kernels/*.cl") + +add_executable(${PNAME} ${SOURCES} ${HEADERS} ${KERNELS}) + +# Follow the sub directory structure to add sub-filters in VS +# Gotta do it one by one unfortunately + +foreach (source IN ITEMS ${SOURCES}) + if (IS_ABSOLUTE "${source}") + + get_filename_component(filename ${source} DIRECTORY) + + STRING(REGEX REPLACE "/" "\\\\" filename ${filename}) + + string(REGEX MATCHALL "src(.*)" substrings ${filename}) + list(GET substrings 0 substring) + + SOURCE_GROUP(${substring} FILES ${source}) + + endif() +endforeach() + +foreach (source IN ITEMS ${HEADERS}) + if (IS_ABSOLUTE "${source}") + + get_filename_component(filename ${source} DIRECTORY) + + STRING(REGEX REPLACE "/" "\\\\" filename ${filename}) + + string(REGEX MATCHALL "include(.*)" substrings ${filename}) + list(GET substrings 0 substring) + + SOURCE_GROUP(${substring} FILES ${source}) + + endif() +endforeach() + +foreach (source IN ITEMS ${KERNELS}) + if (IS_ABSOLUTE "${source}") + + get_filename_component(filename ${source} DIRECTORY) -add_executable(${PNAME} ${SOURCES} ${HEADERS}) + STRING(REGEX REPLACE "/" "\\\\" filename ${filename}) + + string(REGEX MATCHALL "kernels(.*)" substrings ${filename}) + list(GET substrings 0 substring) + + SOURCE_GROUP(${substring} FILES ${source}) + + endif() +endforeach() # Link CL, GL, and SFML target_link_libraries (${PNAME} ${SFML_LIBRARIES} ${SFML_DEPENDENCIES}) +target_link_libraries (${PNAME} ${OpenCL_LIBRARY}) +target_link_libraries (${PNAME} ${OPENGL_LIBRARIES}) if (NOT WIN32) target_link_libraries (${PNAME} -lpthread) diff --git a/include/OpenCL.h b/include/OpenCL.h new file mode 100644 index 0000000..687ba3a --- /dev/null +++ b/include/OpenCL.h @@ -0,0 +1,112 @@ +#pragma once + +#ifdef linux +#include +#include + +#elif defined _WIN32 +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#include +#include +#include + +// Note: windows.h must be included before Gl/GL.h +#include +#include + +#elif defined TARGET_OS_MAC +#include + +#endif + +#include +#include +#include +#include + +struct device { + + cl_device_id id; + cl_device_type type; + cl_uint clock_frequency; + char version[128]; + cl_platform_id platform; + cl_uint comp_units; + char extensions[1024]; + char name[256]; + cl_bool is_little_endian = false; + bool cl_gl_sharing = false; + +}; + + +class OpenCL { + +public: + + OpenCL(sf::Vector2i resolution); + ~OpenCL(); + + bool init(); + + void run_kernel(std::string kernel_name); + + void draw(sf::RenderWindow *window); + +private: + + int error = 0; + + // Sprite and texture that is shared between CL and GL + sf::Sprite viewport_sprite; + sf::Texture viewport_texture; + sf::Vector2i viewport_resolution; + + // The device which we have selected according to certain criteria + cl_platform_id platform_id; + cl_device_id device_id; + + // The GL shared context and its subsiquently generated command queue + cl_context context; + cl_command_queue command_queue; + + // Maps which contain a mapping from "name" to the host side CL memory object + std::unordered_map kernel_map; + std::unordered_map buffer_map; + + // Query the hardware on this machine and select the best device and the platform on which it resides + void aquire_hardware(); + + // After aquiring hardware, create a shared context using platform specific CL commands + void create_shared_context(); + + // Command queues must be created with a valid context + void create_command_queue(); + + // Compile the kernel and store it in the kernel map with the name as the key + bool compile_kernel(std::string kernel_path, std::string kernel_name); + + // Buffer operations + // All of these functions create and store a buffer in a map with the key representing their name + + // Create an image buffer from an SF texture. Access Type is the read/write specifier required by OpenCL + int create_image_buffer(std::string buffer_name, cl_uint size, sf::Texture* texture, cl_int access_type); + + // Create a buffer with CL_MEM_READ_ONLY and CL_MEM_COPY_HOST_PTR + int create_buffer(std::string buffer_name, cl_uint size, void* data); + + // Create a buffer with user defined data access flags + int create_buffer(std::string buffer_name, cl_uint size, void* data, cl_mem_flags flags); + + // Store a cl_mem object in the buffer map + int store_buffer(cl_mem buffer, std::string buffer_name); + + // Using CL release the memory object and remove the KVP associated with the buffer name + int release_buffer(std::string buffer_name); + + void assign_kernel_args(); + int set_kernel_arg(std::string kernel_name, int index, std::string buffer_name); + + bool vr_assert(int error_code, std::string function_name); + +}; \ No newline at end of file diff --git a/include/util.hpp b/include/util.hpp index ecce740..1aac36e 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -97,32 +97,6 @@ private: int vertex_position = 0; }; -struct debug_text { -public: - debug_text(int slot, int pixel_spacing, void* data_, std::string prefix_) : data(data_), prefix(prefix_) { - if (!f.loadFromFile("../assets/fonts/Arial.ttf")) { - std::cout << "couldn't find the fall back Arial font in ../assets/fonts/" << std::endl; - } - else { - t.setFont(f); - t.setCharacterSize(20); - t.setPosition(static_cast(20), static_cast(slot * pixel_spacing)); - } - - } - - void draw(sf::RenderWindow *r) { - t.setString(prefix + std::to_string(*(float*)data)); - r->draw(t); - } - -private: - void* data; - std::string prefix; - sf::Font f; - sf::Text t; - -}; inline sf::Vector3f SphereToCart(sf::Vector2f i) { @@ -169,7 +143,6 @@ inline sf::Vector3f FixOrigin(sf::Vector3f base, sf::Vector3f head) { return head - base; } - inline sf::Vector3f Normalize(sf::Vector3f in) { float multiplier = sqrt(in.x * in.x + in.y * in.y + in.z * in.z); @@ -182,7 +155,6 @@ inline sf::Vector3f Normalize(sf::Vector3f in) { } - inline float DotProduct(sf::Vector3f a, sf::Vector3f b){ return a.x * b.x + a.y * b.y + a.z * b.z; } @@ -252,61 +224,6 @@ inline void DumpLog(std::stringstream* ss, std::string file_name) { } -inline std::string sfml_get_input(sf::RenderWindow *window) { - - std::stringstream ss; - - sf::Event event; - while (window->pollEvent(event)) { - if (event.type == sf::Event::TextEntered) { - ss << event.text.unicode; - } - - else if (event.type == sf::Event::KeyPressed) { - if (event.key.code == sf::Keyboard::Return) { - return ss.str(); - } - } - } -} - -inline std::vector sfml_get_float_input(sf::RenderWindow *window) { - - std::stringstream ss; - - sf::Event event; - while (true) { - - if (window->pollEvent(event)) { - - if (event.type == sf::Event::TextEntered) { - if (event.text.unicode > 47 && event.text.unicode < 58 || event.text.unicode == 32) - ss << static_cast(event.text.unicode); - } - - else if (event.type == sf::Event::KeyPressed) { - - if (event.key.code == sf::Keyboard::Return) { - break; - } - } - } - } - - std::istream_iterator begin(ss); - std::istream_iterator end; - std::vector vstrings(begin, end); - - std::vector ret; - - for (auto i: vstrings) { - ret.push_back(std::stof(i)); - } - - return ret; - -} - inline int count_bits(int32_t v) { v = v - ((v >> 1) & 0x55555555); // reuse input as temporary diff --git a/kernels/mandlebrot.cl b/kernels/mandlebrot.cl new file mode 100644 index 0000000..1441f60 --- /dev/null +++ b/kernels/mandlebrot.cl @@ -0,0 +1,39 @@ + +float scale(float valueIn, float origMin, float origMax, float scaledMin, float scaledMax) { + return ((scaledMax - scaledMin) * (valueIn - origMin) / (origMax - origMin)) + scaledMin; +} + +__kernel void mandlebrot ( + global int2* image_res, + __write_only image2d_t image, + global float4* range + ){ + + size_t x_pixel = get_global_id(0); + size_t y_pixel = get_global_id(1); + + int2 pixel = (int2)(x_pixel, y_pixel); + + float x0 = scale(x_pixel, 0, (*image_res).x, (*range).x, (*range).y); + float y0 = scale(y_pixel, 0, (*image_res).y, (*range).z, (*range).w); + + float x = 0.0; + float y = 0.0; + + int iteration_count = 0; + int interation_threshold = 1000; + + while (x*x + y*y < 4 && iteration_count < interation_threshold) { + float x_temp = x*x - y*y + x0; + y = 2 * x * y + y0; + x = x_temp; + iteration_count++; + } + + int val = scale(iteration_count, 0, 1000, 0, 16777216); + + write_imagef(image, pixel, (float4)(val & 0xff, (val >> 8) & 0xff, (val >> 16) & 0xff, 200)); + + return; + +} diff --git a/src/OpenCL.cpp b/src/OpenCL.cpp new file mode 100644 index 0000000..e0f8b0d --- /dev/null +++ b/src/OpenCL.cpp @@ -0,0 +1,669 @@ +#include +#include "util.hpp" + + +void OpenCL::run_kernel(std::string kernel_name) { + + size_t global_work_size[2] = { static_cast(viewport_resolution.x), static_cast(viewport_resolution.y) }; + + cl_kernel kernel = kernel_map.at(kernel_name); + + error = clEnqueueAcquireGLObjects(command_queue, 1, &buffer_map.at("viewport_image"), 0, 0, 0); + if (vr_assert(error, "clEnqueueAcquireGLObjects")) + return; + + //error = clEnqueueTask(command_queue, kernel, 0, NULL, NULL); + error = clEnqueueNDRangeKernel( + command_queue, kernel, + 2, NULL, global_work_size, + NULL, 0, NULL, NULL); + + if (vr_assert(error, "clEnqueueNDRangeKernel")) + return; + + clFinish(command_queue); + + // What if errors out and gl objects are never released? + error = clEnqueueReleaseGLObjects(command_queue, 1, &buffer_map.at("viewport_image"), 0, NULL, NULL); + if (vr_assert(error, "clEnqueueReleaseGLObjects")) + return; + +} + +void OpenCL::draw(sf::RenderWindow *window) { + + window->draw(viewport_sprite); +} + +void OpenCL::aquire_hardware() { + // Get the number of platforms + cl_uint plt_cnt = 0; + clGetPlatformIDs(0, nullptr, &plt_cnt); + + // Fetch the platforms + std::map> plt_ids; + + // buffer before map init + std::vector plt_buf(plt_cnt); + clGetPlatformIDs(plt_cnt, plt_buf.data(), nullptr); + + // Map init + for (auto id : plt_buf) { + plt_ids.emplace(std::make_pair(id, std::vector())); + } + + // For each platform, populate its devices + for (unsigned int i = 0; i < plt_cnt; i++) { + + cl_uint deviceIdCount = 0; + error = clGetDeviceIDs(plt_buf[i], CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceIdCount); + + // Check to see if we even have OpenCL on this machine + if (deviceIdCount == 0) { + std::cout << "There appears to be no devices, or none at least supporting OpenCL" << std::endl; + return; + } + + // Get the device ids + std::vector deviceIds(deviceIdCount); + error = clGetDeviceIDs(plt_buf[i], CL_DEVICE_TYPE_ALL, deviceIdCount, deviceIds.data(), NULL); + + if (vr_assert(error, "clGetDeviceIDs")) + return; + + for (unsigned int q = 0; q < deviceIdCount; q++) { + + device d; + + d.id = deviceIds[q]; + + clGetDeviceInfo(d.id, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &d.platform, NULL); + clGetDeviceInfo(d.id, CL_DEVICE_VERSION, sizeof(char) * 128, &d.version, NULL); + clGetDeviceInfo(d.id, CL_DEVICE_TYPE, sizeof(cl_device_type), &d.type, NULL); + clGetDeviceInfo(d.id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint), &d.clock_frequency, NULL); + clGetDeviceInfo(d.id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &d.comp_units, NULL); + clGetDeviceInfo(d.id, CL_DEVICE_EXTENSIONS, 1024, &d.extensions, NULL); + clGetDeviceInfo(d.id, CL_DEVICE_NAME, 256, &d.name, NULL); + clGetDeviceInfo(d.id, CL_DEVICE_ENDIAN_LITTLE, sizeof(cl_bool), &d.is_little_endian, NULL); + + std::cout << "Device: " << q << std::endl; + std::cout << "Device Name : " << d.name << std::endl; + + std::cout << "Platform ID : " << d.platform << std::endl; + std::cout << "Device Version : " << d.version << std::endl; + + std::cout << "Device Type : "; + if (d.type == CL_DEVICE_TYPE_CPU) + std::cout << "CPU" << std::endl; + + else if (d.type == CL_DEVICE_TYPE_GPU) + std::cout << "GPU" << std::endl; + + else if (d.type == CL_DEVICE_TYPE_ACCELERATOR) + std::cout << "Accelerator" << std::endl; + + std::cout << "Max clock frequency : " << d.clock_frequency << std::endl; + std::cout << "Max compute units : " << d.comp_units << std::endl; + std::cout << "Is little endian : " << std::boolalpha << static_cast(d.is_little_endian) << std::endl; + + std::cout << "cl_khr_gl_sharing supported: "; + if (std::string(d.extensions).find("cl_khr_gl_sharing") == std::string::npos && + std::string(d.extensions).find("cl_APPLE_gl_sharing") == std::string::npos) { + std::cout << "False" << std::endl; + } + std::cout << "True" << std::endl; + d.cl_gl_sharing = true; + + std::cout << "Extensions supported: " << std::endl; + std::cout << std::string(d.extensions) << std::endl; + + std::cout << " ===================================================================================== " << std::endl; + + plt_ids.at(d.platform).push_back(d); + } + } + + + // The devices how now been queried we want to shoot for a gpu with the fastest clock, + // falling back to the cpu with the fastest clock if we weren't able to find one + + device current_best_device; + current_best_device.type = 0; // Set this to 0 so the first run always selects a new device + current_best_device.clock_frequency = 0; + current_best_device.comp_units = 0; + + + for (auto kvp : plt_ids) { + + for (auto device : kvp.second) { + + // Gonna just split this up into cases. There are so many devices I cant test with + // that opencl supports. I'm not going to waste my time making a generic implimentation + + // Upon success of a condition, set the current best device values + + //if (strcmp(device.version, "OpenCL 1.2 ") == 0 && strcmp(device.version, current_best_device.version) != 0) { + // current_best_device = device; + //} + + // If the current device is not a GPU and we are comparing it to a GPU + if (device.type == CL_DEVICE_TYPE_GPU && current_best_device.type != CL_DEVICE_TYPE_GPU) { + current_best_device = device; + } + + //if (device.type == CL_DEVICE_TYPE_CPU && + // current_best_device.type != CL_DEVICE_TYPE_CPU) { + // current_best_device = device; + //} + + // Get the unit with the higher compute units + if (device.comp_units > current_best_device.comp_units) { + current_best_device = device; + } + + // If we are comparing CPU to CPU get the one with the best clock + if (current_best_device.type != CL_DEVICE_TYPE_GPU && device.clock_frequency > current_best_device.clock_frequency) { + current_best_device = device; + } + + if (current_best_device.cl_gl_sharing == false && device.cl_gl_sharing == true) { + current_best_device = device; + } + + } + } + + platform_id = current_best_device.platform; + device_id = current_best_device.id; + + std::cout << std::endl; + std::cout << "Selected Platform : " << platform_id << std::endl; + std::cout << "Selected Device : " << device_id << std::endl; + std::cout << "Selected Name : " << current_best_device.name << std::endl; + std::cout << "Selected Version : " << current_best_device.version << std::endl; + + if (current_best_device.cl_gl_sharing == false) { + std::cout << "This device does not support the cl_khr_gl_sharing extension" << std::endl; + return; + } + +} + +void OpenCL::create_shared_context() { + + // Hurray for standards! + // Setup the context properties to grab the current GL context + +#ifdef linux + + cl_context_properties context_properties[] = { + CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(), + CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(), + CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, + 0 + }; + +#elif defined _WIN32 + + HGLRC hGLRC = wglGetCurrentContext(); + HDC hDC = wglGetCurrentDC(); + cl_context_properties context_properties[] = { + CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, + CL_GL_CONTEXT_KHR, (cl_context_properties)hGLRC, + CL_WGL_HDC_KHR, (cl_context_properties)hDC, + 0 + }; + + +#elif defined TARGET_OS_MAC + + CGLContextObj glContext = CGLGetCurrentContext(); + CGLShareGroupObj shareGroup = CGLGetShareGroup(glContext); + cl_context_properties context_properties[] = { + CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, + (cl_context_properties)shareGroup, + 0 + }; + +#endif + + // Create our shared context + context = clCreateContext( + context_properties, + 1, + &device_id, + nullptr, nullptr, + &error + ); + + if (vr_assert(error, "clCreateContext")) + return; + +} + +void OpenCL::create_command_queue() { + + // If context and device_id have initialized + if (context && device_id) { + + command_queue = clCreateCommandQueue(context, device_id, 0, &error); + + if (vr_assert(error, "clCreateCommandQueue")) + return; + + return; + } + else { + std::cout << "Failed creating the command queue. Context or device_id not initialized"; + return; + } +} + +bool OpenCL::compile_kernel(std::string kernel_path, std::string kernel_name) { + + const char* source; + std::string tmp; + + //Load in the kernel, and c stringify it + tmp = read_file(kernel_path); + source = tmp.c_str(); + + + size_t kernel_source_size = strlen(source); + + // Load the source into CL's data structure + + cl_program program = clCreateProgramWithSource( + context, 1, + &source, + &kernel_source_size, &error + ); + + // This is not for compilation, it only loads the source + if (vr_assert(error, "clCreateProgramWithSource")) + return false; + + + // Try and build the program + // "-cl-finite-math-only -cl-fast-relaxed-math -cl-unsafe-math-optimizations" + error = clBuildProgram(program, 1, &device_id, "-cl-finite-math-only -cl-fast-relaxed-math -cl-unsafe-math-optimizations", NULL, NULL); + + // Check to see if it errored out + if (vr_assert(error, "clBuildProgram")) { + + // Get the size of the queued log + size_t log_size; + clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); + char *log = new char[log_size]; + + // Grab the log + clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL); + + std::cout << log; + return false; + } + + // Done initializing the kernel + cl_kernel kernel = clCreateKernel(program, kernel_name.c_str(), &error); + + if (vr_assert(error, "clCreateKernel")) + return false; + + // Do I want these to overlap when repeated?? + kernel_map[kernel_name] = kernel; + + return true; +} + +int OpenCL::create_image_buffer(std::string buffer_name, cl_uint size, sf::Texture* texture, cl_int access_type) { + + if (buffer_map.count(buffer_name) > 0) { + release_buffer(buffer_name); + } + + int error; + cl_mem buff = clCreateFromGLTexture( + context, access_type, GL_TEXTURE_2D, + 0, texture->getNativeHandle(), &error); + + if (vr_assert(error, "clCreateFromGLTexture")) + return 1; + + store_buffer(buff, buffer_name); + + return 1; +} + +int OpenCL::create_buffer(std::string buffer_name, cl_uint size, void* data) { + + if (buffer_map.count(buffer_name) > 0) { + release_buffer(buffer_name); + } + + cl_mem buff = clCreateBuffer( + context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + size, data, &error + ); + + if (vr_assert(error, "clCreateBuffer")) + return -1; + + store_buffer(buff, buffer_name); + + return 1; +} + +int OpenCL::create_buffer(std::string buffer_name, cl_uint size, void* data, cl_mem_flags flags) { + + if (buffer_map.count(buffer_name) > 0) { + release_buffer(buffer_name); + } + + cl_mem buff = clCreateBuffer( + context, flags, + size, data, &error + ); + + if (vr_assert(error, "clCreateBuffer")) + return -1; + + store_buffer(buff, buffer_name); + + return 1; + +} + +int OpenCL::store_buffer(cl_mem buffer, std::string buffer_name) { + + if (buffer_map.count(buffer_name)) { + clReleaseMemObject(buffer_map[buffer_name]); + } + + buffer_map[buffer_name] = buffer; + + return 1; +} + +int OpenCL::release_buffer(std::string buffer_name) { + + if (buffer_map.count(buffer_name) > 0) { + + int error = clReleaseMemObject(buffer_map.at(buffer_name)); + + if (vr_assert(error, "clReleaseMemObject")) { + std::cout << "Error releasing buffer : " << buffer_name; + std::cout << "Buffer not removed"; + return -1; + + } + else { + buffer_map.erase(buffer_name); + } + + } + else { + std::cout << "Error releasing buffer : " << buffer_name; + std::cout << "Buffer not found"; + return -1; + } + + return 1; +} + +void OpenCL::assign_kernel_args() { + +} + +int OpenCL::set_kernel_arg(std::string kernel_name, int index, std::string buffer_name) { + + error = clSetKernelArg( + kernel_map.at(kernel_name), + index, + sizeof(cl_mem), + (void *)&buffer_map.at(buffer_name)); + + if (vr_assert(error, "clSetKernelArg")) { + std::cout << buffer_name << std::endl; + std::cout << buffer_map.at(buffer_name) << std::endl; + return -1; + } + return 1; +} + +OpenCL::OpenCL(sf::Vector2i resolution) : viewport_resolution(resolution){ + + viewport_texture.create(viewport_resolution.x, viewport_resolution.y); + viewport_sprite.setTexture(viewport_texture); + + +} + +OpenCL::~OpenCL() { + +} + +bool OpenCL::init() { + + // Initialize opencl up to the point where we start assigning buffers + aquire_hardware(); + + create_shared_context(); + + create_command_queue(); + + while (!compile_kernel("../kernels/mandlebrot.cl", "mandlebrot")) { + std::cin.get(); + } + + create_image_buffer("viewport_image", viewport_texture.getSize().x * viewport_texture.getSize().x * 4 * sizeof(float), &viewport_texture, CL_MEM_WRITE_ONLY); + create_buffer("image_res", sizeof(sf::Vector2i), &viewport_resolution); + + sf::Vector4i range(-1.0f, 1.0f, -1.0f, 1.0f); + create_buffer("range", sizeof(sf::Vector4i), &range); + + set_kernel_arg("mandlebrot", 0, "image_res"); + set_kernel_arg("mandlebrot", 1, "viewport_image"); + set_kernel_arg("mandlebrot", 2, "range"); + + return true; +} + + +bool OpenCL::vr_assert(int error_code, std::string function_name) { + + // Just gonna do a little jump table here, just error codes so who cares + std::string err_msg = "Error : "; + + switch (error_code) { + + case CL_SUCCESS: + return false; + + case 1: + return false; + + case CL_DEVICE_NOT_FOUND: + err_msg += "CL_DEVICE_NOT_FOUND"; + break; + case CL_DEVICE_NOT_AVAILABLE: + err_msg = "CL_DEVICE_NOT_AVAILABLE"; + break; + case CL_COMPILER_NOT_AVAILABLE: + err_msg = "CL_COMPILER_NOT_AVAILABLE"; + break; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + err_msg = "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + break; + case CL_OUT_OF_RESOURCES: + err_msg = "CL_OUT_OF_RESOURCES"; + break; + case CL_OUT_OF_HOST_MEMORY: + err_msg = "CL_OUT_OF_HOST_MEMORY"; + break; + case CL_PROFILING_INFO_NOT_AVAILABLE: + err_msg = "CL_PROFILING_INFO_NOT_AVAILABLE"; + break; + case CL_MEM_COPY_OVERLAP: + err_msg = "CL_MEM_COPY_OVERLAP"; + break; + case CL_IMAGE_FORMAT_MISMATCH: + err_msg = "CL_IMAGE_FORMAT_MISMATCH"; + break; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: + err_msg = "CL_IMAGE_FORMAT_NOT_SUPPORTED"; + break; + case CL_BUILD_PROGRAM_FAILURE: + err_msg = "CL_BUILD_PROGRAM_FAILURE"; + break; + case CL_MAP_FAILURE: + err_msg = "CL_MAP_FAILURE"; + break; + case CL_MISALIGNED_SUB_BUFFER_OFFSET: + err_msg = "CL_MISALIGNED_SUB_BUFFER_OFFSET"; + break; + case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: + err_msg = "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; + break; + case CL_COMPILE_PROGRAM_FAILURE: + err_msg = "CL_COMPILE_PROGRAM_FAILURE"; + break; + case CL_LINKER_NOT_AVAILABLE: + err_msg = "CL_LINKER_NOT_AVAILABLE"; + break; + case CL_LINK_PROGRAM_FAILURE: + err_msg = "CL_LINK_PROGRAM_FAILURE"; + break; + case CL_DEVICE_PARTITION_FAILED: + err_msg = "CL_DEVICE_PARTITION_FAILED"; + break; + case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: + err_msg = "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; + break; + case CL_INVALID_VALUE: + err_msg = "CL_INVALID_VALUE"; + break; + case CL_INVALID_DEVICE_TYPE: + err_msg = "CL_INVALID_DEVICE_TYPE"; + break; + case CL_INVALID_PLATFORM: + err_msg = "CL_INVALID_PLATFORM"; + break; + case CL_INVALID_DEVICE: + err_msg = "CL_INVALID_DEVICE"; + break; + case CL_INVALID_CONTEXT: + err_msg = "CL_INVALID_CONTEXT"; + break; + case CL_INVALID_QUEUE_PROPERTIES: + err_msg = "CL_INVALID_QUEUE_PROPERTIES"; + break; + case CL_INVALID_COMMAND_QUEUE: + err_msg = "CL_INVALID_COMMAND_QUEUE"; + break; + case CL_INVALID_HOST_PTR: + err_msg = "CL_INVALID_HOST_PTR"; + break; + case CL_INVALID_MEM_OBJECT: + err_msg = "CL_INVALID_MEM_OBJECT"; + break; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: + err_msg = "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; + break; + case CL_INVALID_IMAGE_SIZE: + err_msg = "CL_INVALID_IMAGE_SIZE"; + break; + case CL_INVALID_SAMPLER: + err_msg = "CL_INVALID_SAMPLER"; + break; + case CL_INVALID_BINARY: + err_msg = "CL_INVALID_BINARY"; + break; + case CL_INVALID_BUILD_OPTIONS: + err_msg = "CL_INVALID_BUILD_OPTIONS"; + break; + case CL_INVALID_PROGRAM: + err_msg = "CL_INVALID_PROGRAM"; + break; + case CL_INVALID_PROGRAM_EXECUTABLE: + err_msg = "CL_INVALID_PROGRAM_EXECUTABLE"; + break; + case CL_INVALID_KERNEL_NAME: + err_msg = "CL_INVALID_KERNEL_NAME"; + break; + case CL_INVALID_KERNEL_DEFINITION: + err_msg = "CL_INVALID_KERNEL_DEFINITION"; + break; + case CL_INVALID_KERNEL: + err_msg = "CL_INVALID_KERNEL"; + break; + case CL_INVALID_ARG_INDEX: + err_msg = "CL_INVALID_ARG_INDEX"; + break; + case CL_INVALID_ARG_VALUE: + err_msg = "CL_INVALID_ARG_VALUE"; + break; + case CL_INVALID_ARG_SIZE: + err_msg = "CL_INVALID_ARG_SIZE"; + break; + case CL_INVALID_KERNEL_ARGS: + err_msg = "CL_INVALID_KERNEL_ARGS"; + break; + case CL_INVALID_WORK_DIMENSION: + err_msg = "CL_INVALID_WORK_DIMENSION"; + break; + case CL_INVALID_WORK_GROUP_SIZE: + err_msg = "CL_INVALID_WORK_GROUP_SIZE"; + break; + case CL_INVALID_WORK_ITEM_SIZE: + err_msg = "CL_INVALID_WORK_ITEM_SIZE"; + break; + case CL_INVALID_GLOBAL_OFFSET: + err_msg = "CL_INVALID_GLOBAL_OFFSET"; + break; + case CL_INVALID_EVENT_WAIT_LIST: + err_msg = "CL_INVALID_EVENT_WAIT_LIST"; + break; + case CL_INVALID_EVENT: + err_msg = "CL_INVALID_EVENT"; + break; + case CL_INVALID_OPERATION: + err_msg = "CL_INVALID_OPERATION"; + break; + case CL_INVALID_GL_OBJECT: + err_msg = "CL_INVALID_GL_OBJECT"; + break; + case CL_INVALID_BUFFER_SIZE: + err_msg = "CL_INVALID_BUFFER_SIZE"; + break; + case CL_INVALID_MIP_LEVEL: + err_msg = "CL_INVALID_MIP_LEVEL"; + break; + case CL_INVALID_GLOBAL_WORK_SIZE: + err_msg = "CL_INVALID_GLOBAL_WORK_SIZE"; + break; + case CL_INVALID_PROPERTY: + err_msg = "CL_INVALID_PROPERTY"; + break; + case CL_INVALID_IMAGE_DESCRIPTOR: + err_msg = "CL_INVALID_IMAGE_DESCRIPTOR"; + break; + case CL_INVALID_COMPILER_OPTIONS: + err_msg = "CL_INVALID_COMPILER_OPTIONS"; + break; + case CL_INVALID_LINKER_OPTIONS: + err_msg = "CL_INVALID_LINKER_OPTIONS"; + break; + case CL_INVALID_DEVICE_PARTITION_COUNT: + err_msg = "CL_INVALID_DEVICE_PARTITION_COUNT"; + break; + case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR: + err_msg = "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR"; + break; + case CL_PLATFORM_NOT_FOUND_KHR: + err_msg = "CL_PLATFORM_NOT_FOUND_KHR"; + break; + } + + std::cout << err_msg << " =at= " << function_name << std::endl; + return true; +} diff --git a/src/main.cpp b/src/main.cpp index ed73f97..d8e491e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,6 +6,7 @@ #include "util.hpp" #include #include +#include "OpenCL.h" float elap_time() { static std::chrono::time_point start; @@ -43,15 +44,15 @@ void func(int id, int count, sf::Uint8* pixels) { int iteration_count = 0; int interation_threshold = 1000; - while (pow(x, 2) + pow(y, 2) < pow(2, 2) && iteration_count < interation_threshold) { - float x_temp = pow(x, 2) - pow(y, 2) + x0; + while (x*x + y*y < 4 && iteration_count < interation_threshold) { + float x_temp = x*x - y*y + x0; y = 2 * x * y + y0; x = x_temp; iteration_count++; } sf::Color c(0, 0, scale(iteration_count, 0, 1000, 0, 255), 255); - int val = scale(iteration_count, 0, 1000, 0, pow(2, 24)); + int val = scale(iteration_count, 0, 1000, 0, 16777216); pixels[(pixel_y * WINDOW_X + pixel_x) * 4 + 0] = val & 0xff; pixels[(pixel_y * WINDOW_X + pixel_x) * 4 + 1] = (val >> 8) & 0xff; @@ -61,6 +62,8 @@ void func(int id, int count, sf::Uint8* pixels) { } } +enum Mouse_State {PRESSED, DEPRESSED}; + int main() { std::mt19937 rng(time(NULL)); @@ -69,13 +72,19 @@ int main() { sf::RenderWindow window(sf::VideoMode(WINDOW_X, WINDOW_Y), "quick-sfml-template"); window.setFramerateLimit(60); - float physic_step = 0.166f; float physic_time = 0.0f; double frame_time = 0.0, elapsed_time = 0.0, delta_time = 0.0, accumulator_time = 0.0, current_time = 0.0; fps_counter fps; + OpenCL cl(sf::Vector2i(WINDOW_X, WINDOW_Y)); + cl.init(); + + + + + sf::Uint8 *pixels = new sf::Uint8[WINDOW_X * WINDOW_Y * 4]; sf::Sprite viewport_sprite;