main.cpp 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. #include "kompute/Core.hpp"
  2. #include <string>
  3. #include <vector>
  4. #include <memory>
  5. #include <iostream>
  6. #include <kompute/Kompute.hpp>
  7. #include <spdlog/spdlog.h>
  8. static std::vector<uint32_t> compileSource(const std::string& source) {
  9. std::ofstream file_out("tmp_kp_shader.comp");
  10. file_out << source;
  11. file_out.close();
  12. system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str());
  13. std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
  14. std::vector<char> buffer;
  15. buffer.insert(buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
  16. return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};
  17. }
  18. void kompute(const std::string& shader) {
  19. // 1. Create Kompute Manager with default settings (device 0, first queue and no extensions)
  20. kp::Manager mgr;
  21. // 2. Create and initialise Kompute Tensors through manager
  22. // Default tensor constructor simplifies creation of float values
  23. auto tensorInA = mgr.tensor({ 2., 2., 2. });
  24. auto tensorInB = mgr.tensor({ 1., 2., 3. });
  25. // Explicit type constructor supports uint32, int32, double, float and bool
  26. auto tensorOutA = mgr.tensorT<uint32_t>({ 0, 0, 0 });
  27. auto tensorOutB = mgr.tensorT<uint32_t>({ 0, 0, 0 });
  28. std::vector<std::shared_ptr<kp::Tensor>> params = {tensorInA, tensorInB, tensorOutA, tensorOutB};
  29. // 3. Create algorithm based on shader (supports buffers & push/spec constants)
  30. kp::Workgroup workgroup({3, 1, 1});
  31. std::vector<float> specConsts({ 2 });
  32. std::vector<float> pushConstsA({ 2.0 });
  33. std::vector<float> pushConstsB({ 3.0 });
  34. auto algorithm = mgr.algorithm(params,
  35. // See documentation shader section for compileSource
  36. compileSource(shader),
  37. workgroup,
  38. specConsts,
  39. pushConstsA);
  40. // 4. Run operation synchronously using sequence
  41. mgr.sequence()
  42. ->record<kp::OpTensorSyncDevice>(params)
  43. ->record<kp::OpAlgoDispatch>(algorithm) // Binds default push consts
  44. ->eval(); // Evaluates the two recorded operations
  45. //->record<kp::OpAlgoDispatch>(algorithm, pushConstsB) // Overrides push consts
  46. //->eval(); // Evaluates only last recorded operation
  47. // 5. Sync results from the GPU asynchronously
  48. auto sq = mgr.sequence();
  49. sq->evalAsync<kp::OpTensorSyncLocal>(params);
  50. // ... Do other work asynchronously whilst GPU finishes
  51. sq->evalAwait();
  52. // Prints the first output which is: { 4, 8, 12 }
  53. for (const float& elem : tensorOutA->vector()) std::cout << elem << " ";
  54. std::cout << "\n";
  55. // Prints the second output which is: { 10, 10, 10 }
  56. for (const float& elem : tensorOutB->vector()) std::cout << elem << " ";
  57. std::cout << "\n";
  58. } // Manages / releases all CPU and GPU memory resources
  59. int main() {
  60. // Define a raw string shader (or use the Kompute tools to compile to SPIRV / C++ header
  61. // files). This shader shows some of the main components including constants, buffers, etc
  62. std::string shader = (R"(
  63. #version 450
  64. layout (local_size_x = 1) in;
  65. // The input tensors bind index is relative to index in parameter passed
  66. layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
  67. layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
  68. layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
  69. layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
  70. // Kompute supports push constants updated on dispatch
  71. layout(push_constant) uniform PushConstants {
  72. float val;
  73. } push_const;
  74. // Kompute also supports spec constants on initalization
  75. layout(constant_id = 0) const float const_one = 0;
  76. //[2, 2, 2]
  77. //[1, 2, 3]
  78. //[4, 8, 10]
  79. void main() {
  80. uint index = gl_GlobalInvocationID.x;
  81. out_a[index] += uint( in_a[index] * in_b[index] );
  82. out_b[index] += uint(push_const.val);
  83. }
  84. )");
  85. // Run the function declared above with our raw string shader
  86. kompute(shader);
  87. }