|
@@ -0,0 +1,96 @@
|
|
|
|
+#include "kompute/Core.hpp"
|
|
|
|
+#include <string>
|
|
|
|
+#include <vector>
|
|
|
|
+#include <memory>
|
|
|
|
+#include <iostream>
|
|
|
|
+
|
|
|
|
+#include <kompute/Kompute.hpp>
|
|
|
|
+#include <spdlog/spdlog.h>
|
|
|
|
+
|
|
|
|
+static std::vector<uint32_t> compileSource(const std::string& source) {
|
|
|
|
+ std::ofstream file_out("tmp_kp_shader.comp");
|
|
|
|
+ file_out << source;
|
|
|
|
+ file_out.close();
|
|
|
|
+
|
|
|
|
+ system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str());
|
|
|
|
+ std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
|
|
|
|
+ std::vector<char> buffer;
|
|
|
|
+ buffer.insert(buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
|
|
|
|
+ return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};
|
|
|
|
+
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void kompute(const std::string &shader) {
|
|
|
|
+ kp::Manager mgr;
|
|
|
|
+
|
|
|
|
+ auto tensor_in_a = mgr.tensor({2., 2., 2.});
|
|
|
|
+ auto tensor_in_b = mgr.tensor({1., 2., 3.});
|
|
|
|
+
|
|
|
|
+ auto tensor_out_a = mgr.tensorT<uint32_t>({0, 0, 0});
|
|
|
|
+ auto tensor_out_b = mgr.tensorT<uint32_t>({0, 0, 0});
|
|
|
|
+
|
|
|
|
+ std::vector<std::shared_ptr<kp::Tensor>> params = {
|
|
|
|
+ tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b };
|
|
|
|
+
|
|
|
|
+ kp::Workgroup workgroup({3, 1, 1});
|
|
|
|
+ std::vector<float> specConsts ({ 2 });
|
|
|
|
+ std::vector<float> pushConstsA ({ 2.0 });
|
|
|
|
+ std::vector<float> pushConstsB ({ 2.0 });
|
|
|
|
+
|
|
|
|
+ auto algorith = mgr.algorithm(params,
|
|
|
|
+ compileSource(shader),
|
|
|
|
+ workgroup,
|
|
|
|
+ specConsts,
|
|
|
|
+ pushConstsA);
|
|
|
|
+
|
|
|
|
+ mgr.sequence()
|
|
|
|
+ ->record<kp::OpTensorSyncDevice>(params)
|
|
|
|
+ ->record<kp::OpAlgoDispatch>(algorith)
|
|
|
|
+ ->eval()
|
|
|
|
+ ->record<kp::OpAlgoDispatch>(algorith, pushConstsB)
|
|
|
|
+ ->eval();
|
|
|
|
+
|
|
|
|
+ auto sq = mgr.sequence();
|
|
|
|
+ sq->evalAsync<kp::OpTensorSyncLocal>(params);
|
|
|
|
+ sq->evalAwait();
|
|
|
|
+
|
|
|
|
+ fmt::print("Result A:\n");
|
|
|
|
+ for (const float& elem: tensor_out_a->vector()) {
|
|
|
|
+ //fmt::print("{} ", elem);
|
|
|
|
+ std::cout << elem << " ";
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ fmt::print("\nResult B:\n");
|
|
|
|
+ for (const float& elem: tensor_out_b->vector()) {
|
|
|
|
+ //fmt::print("{} ", elem);
|
|
|
|
+
|
|
|
|
+ std::cout << elem << " ";
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+int main(int argc, char **argv) {
|
|
|
|
+ std::string shader = (R"(
|
|
|
|
+ #version 450
|
|
|
|
+ layout (local_size_x = 1) in;
|
|
|
|
+ layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
|
|
|
|
+ layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
|
|
|
|
+ layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
|
|
|
|
+ layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
|
|
|
|
+
|
|
|
|
+ layout(push_constant) uniform PushConstants {
|
|
|
|
+ float val;
|
|
|
|
+ }push_const;
|
|
|
|
+
|
|
|
|
+ layout (constant_id = 0) const float const_one = 0;
|
|
|
|
+
|
|
|
|
+ void main() {
|
|
|
|
+ uint index = gl_GlobalInvocationID.x;
|
|
|
|
+ out_a[index] += uint( in_a[index] * in_b[index] );
|
|
|
|
+ out_b[index] += uint( const_one * push_const.val );
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+ )");
|
|
|
|
+
|
|
|
|
+ kompute(shader);
|
|
|
|
+}
|