cmte
/
kompute-example


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
							#include "kompute/Core.hpp"
#include <string>
#include <vector>
#include <memory>
#include <iostream>

#include <kompute/Kompute.hpp>
#include <spdlog/spdlog.h>

static std::vector<uint32_t> compileSource(const std::string& source) {
        std::ofstream file_out("tmp_kp_shader.comp");
        file_out << source;
        file_out.close();

        system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str());
        std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
        std::vector<char> buffer;
        buffer.insert(buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
        return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};

}

void kompute(const std::string &shader) {
        kp::Manager mgr;
        
        auto tensor_in_a = mgr.tensor({2., 2., 2.});
        auto tensor_in_b = mgr.tensor({1., 2., 3.});

        auto tensor_out_a = mgr.tensorT<uint32_t>({0, 0, 0});
        auto tensor_out_b = mgr.tensorT<uint32_t>({0, 0, 0});

        std::vector<std::shared_ptr<kp::Tensor>> params = {
                tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b };

        kp::Workgroup workgroup({3, 1, 1});
        std::vector<float> specConsts ({ 2 });
        std::vector<float> pushConstsA ({ 2.0 });
        std::vector<float> pushConstsB ({ 2.0 });

        auto algorith = mgr.algorithm(params,
                                      compileSource(shader),
                                      workgroup, 
                                      specConsts,
                                      pushConstsA);

        mgr.sequence()
                ->record<kp::OpTensorSyncDevice>(params)
                ->record<kp::OpAlgoDispatch>(algorith)
                ->eval()
                ->record<kp::OpAlgoDispatch>(algorith, pushConstsB)
                ->eval();

        auto sq = mgr.sequence();
        sq->evalAsync<kp::OpTensorSyncLocal>(params);
        sq->evalAwait();

        fmt::print("Result A:\n");
        for (const float& elem: tensor_out_a->vector()) {
                //fmt::print("{} ", elem);
                std::cout << elem << " ";
        }
        
        fmt::print("\nResult B:\n");
        for (const float& elem: tensor_out_b->vector()) {
                //fmt::print("{} ", elem);

                std::cout << elem << " ";
        }

}

int main(int argc, char **argv) {
  std::string shader = (R"(
                #version 450
                layout (local_size_x = 1) in;
                layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
                layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
                layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
                layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };

                layout(push_constant) uniform PushConstants {
                        float val;
                }push_const;

                layout (constant_id = 0) const float const_one = 0;

                void main() {
                        uint index = gl_GlobalInvocationID.x;
                        out_a[index] += uint( in_a[index] * in_b[index] );
                        out_b[index] += uint( const_one * push_const.val );

                }
        )");

        kompute(shader);
}