cmte
/
kompute-example


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
							#include "kompute/Core.hpp"
#include <string>
#include <vector>
#include <memory>
#include <iostream>

#include <kompute/Kompute.hpp>
#include <spdlog/spdlog.h>

static std::vector<uint32_t> compileSource(const std::string& source) {
        std::ofstream file_out("tmp_kp_shader.comp");
        file_out << source;
        file_out.close();

        system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str());
        std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
        std::vector<char> buffer;
        buffer.insert(buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
        return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};

}
void kompute(const std::string& shader) {

    // 1. Create Kompute Manager with default settings (device 0, first queue and no extensions)
    kp::Manager mgr;

    // 2. Create and initialise Kompute Tensors through manager

    // Default tensor constructor simplifies creation of float values
    auto tensorInA = mgr.tensor({ 2., 2., 2. });
    auto tensorInB = mgr.tensor({ 1., 2., 3. });
    // Explicit type constructor supports uint32, int32, double, float and bool
    auto tensorOutA = mgr.tensorT<uint32_t>({ 0, 0, 0 });
    auto tensorOutB = mgr.tensorT<uint32_t>({ 0, 0, 0 });

    std::vector<std::shared_ptr<kp::Tensor>> params = {tensorInA, tensorInB, tensorOutA, tensorOutB};

    // 3. Create algorithm based on shader (supports buffers & push/spec constants)
    kp::Workgroup workgroup({3, 1, 1});
    std::vector<float> specConsts({ 2 });
    std::vector<float> pushConstsA({ 2.0 });
    std::vector<float> pushConstsB({ 3.0 });

    auto algorithm = mgr.algorithm(params,
                                   // See documentation shader section for compileSource
                                   compileSource(shader),
                                   workgroup,
                                   specConsts,
                                   pushConstsA);

    // 4. Run operation synchronously using sequence
    mgr.sequence()
        ->record<kp::OpTensorSyncDevice>(params)
        ->record<kp::OpAlgoDispatch>(algorithm) // Binds default push consts
        ->eval(); // Evaluates the two recorded operations
        //->record<kp::OpAlgoDispatch>(algorithm, pushConstsB) // Overrides push consts
        //->eval(); // Evaluates only last recorded operation

    // 5. Sync results from the GPU asynchronously
    auto sq = mgr.sequence();
    sq->evalAsync<kp::OpTensorSyncLocal>(params);

    // ... Do other work asynchronously whilst GPU finishes

    sq->evalAwait();

    // Prints the first output which is: { 4, 8, 12 }
    for (const float& elem : tensorOutA->vector()) std::cout << elem << "  ";
    std::cout << "\n";
    // Prints the second output which is: { 10, 10, 10 }
    for (const float& elem : tensorOutB->vector()) std::cout << elem << "  ";

    std::cout << "\n";
} // Manages / releases all CPU and GPU memory resources

int main() {

    // Define a raw string shader (or use the Kompute tools to compile to SPIRV / C++ header
    // files). This shader shows some of the main components including constants, buffers, etc
    std::string shader = (R"(
        #version 450

        layout (local_size_x = 1) in;

        // The input tensors bind index is relative to index in parameter passed
        layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
        layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
        layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
        layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };

        // Kompute supports push constants updated on dispatch
        layout(push_constant) uniform PushConstants {
            float val;
        } push_const;

        // Kompute also supports spec constants on initalization
        layout(constant_id = 0) const float const_one = 0;
        
        //[2, 2,  2]
        //[1, 2,  3]
        //[4, 8, 10]

        void main() {
            uint index = gl_GlobalInvocationID.x;
            out_a[index] += uint( in_a[index] * in_b[index] );
            out_b[index] += uint(push_const.val);
        }
    )");

    // Run the function declared above with our raw string shader
    kompute(shader);
}