#include "kompute/Core.hpp" #include #include #include #include #include #include static std::vector compileSource(const std::string& source) { std::ofstream file_out("tmp_kp_shader.comp"); file_out << source; file_out.close(); system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str()); std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary); std::vector buffer; buffer.insert(buffer.begin(), std::istreambuf_iterator(fileStream), {}); return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())}; } void kompute(const std::string& shader) { // 1. Create Kompute Manager with default settings (device 0, first queue and no extensions) kp::Manager mgr; // 2. Create and initialise Kompute Tensors through manager // Default tensor constructor simplifies creation of float values auto tensorInA = mgr.tensor({ 2., 2., 2. }); auto tensorInB = mgr.tensor({ 1., 2., 3. }); // Explicit type constructor supports uint32, int32, double, float and bool auto tensorOutA = mgr.tensorT({ 0, 0, 0 }); auto tensorOutB = mgr.tensorT({ 0, 0, 0 }); std::vector> params = {tensorInA, tensorInB, tensorOutA, tensorOutB}; // 3. Create algorithm based on shader (supports buffers & push/spec constants) kp::Workgroup workgroup({3, 1, 1}); std::vector specConsts({ 2 }); std::vector pushConstsA({ 2.0 }); std::vector pushConstsB({ 3.0 }); auto algorithm = mgr.algorithm(params, // See documentation shader section for compileSource compileSource(shader), workgroup, specConsts, pushConstsA); // 4. Run operation synchronously using sequence mgr.sequence() ->record(params) ->record(algorithm) // Binds default push consts ->eval(); // Evaluates the two recorded operations //->record(algorithm, pushConstsB) // Overrides push consts //->eval(); // Evaluates only last recorded operation // 5. Sync results from the GPU asynchronously auto sq = mgr.sequence(); sq->evalAsync(params); // ... Do other work asynchronously whilst GPU finishes sq->evalAwait(); // Prints the first output which is: { 4, 8, 12 } for (const float& elem : tensorOutA->vector()) std::cout << elem << " "; std::cout << "\n"; // Prints the second output which is: { 10, 10, 10 } for (const float& elem : tensorOutB->vector()) std::cout << elem << " "; std::cout << "\n"; } // Manages / releases all CPU and GPU memory resources int main() { // Define a raw string shader (or use the Kompute tools to compile to SPIRV / C++ header // files). This shader shows some of the main components including constants, buffers, etc std::string shader = (R"( #version 450 layout (local_size_x = 1) in; // The input tensors bind index is relative to index in parameter passed layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; }; layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; }; layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; }; layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; }; // Kompute supports push constants updated on dispatch layout(push_constant) uniform PushConstants { float val; } push_const; // Kompute also supports spec constants on initalization layout(constant_id = 0) const float const_one = 0; //[2, 2, 2] //[1, 2, 3] //[4, 8, 10] void main() { uint index = gl_GlobalInvocationID.x; out_a[index] += uint( in_a[index] * in_b[index] ); out_b[index] += uint(push_const.val); } )"); // Run the function declared above with our raw string shader kompute(shader); }