|
@@ -19,78 +19,94 @@ static std::vector<uint32_t> compileSource(const std::string& source) {
|
|
|
return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};
|
|
|
|
|
|
}
|
|
|
-
|
|
|
-void kompute(const std::string &shader) {
|
|
|
- kp::Manager mgr;
|
|
|
-
|
|
|
- auto tensor_in_a = mgr.tensor({2., 2., 2.});
|
|
|
- auto tensor_in_b = mgr.tensor({1., 2., 3.});
|
|
|
-
|
|
|
- auto tensor_out_a = mgr.tensorT<uint32_t>({0, 0, 0});
|
|
|
- auto tensor_out_b = mgr.tensorT<uint32_t>({0, 0, 0});
|
|
|
-
|
|
|
- std::vector<std::shared_ptr<kp::Tensor>> params = {
|
|
|
- tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b };
|
|
|
-
|
|
|
- kp::Workgroup workgroup({3, 1, 1});
|
|
|
- std::vector<float> specConsts ({ 2 });
|
|
|
- std::vector<float> pushConstsA ({ 2.0 });
|
|
|
- std::vector<float> pushConstsB ({ 2.0 });
|
|
|
-
|
|
|
- auto algorith = mgr.algorithm(params,
|
|
|
- compileSource(shader),
|
|
|
- workgroup,
|
|
|
- specConsts,
|
|
|
- pushConstsA);
|
|
|
-
|
|
|
- mgr.sequence()
|
|
|
- ->record<kp::OpTensorSyncDevice>(params)
|
|
|
- ->record<kp::OpAlgoDispatch>(algorith)
|
|
|
- ->eval()
|
|
|
- ->record<kp::OpAlgoDispatch>(algorith, pushConstsB)
|
|
|
- ->eval();
|
|
|
-
|
|
|
- auto sq = mgr.sequence();
|
|
|
- sq->evalAsync<kp::OpTensorSyncLocal>(params);
|
|
|
- sq->evalAwait();
|
|
|
-
|
|
|
- fmt::print("Result A:\n");
|
|
|
- for (const float& elem: tensor_out_a->vector()) {
|
|
|
- //fmt::print("{} ", elem);
|
|
|
- std::cout << elem << " ";
|
|
|
- }
|
|
|
+void kompute(const std::string& shader) {
|
|
|
+
|
|
|
+ // 1. Create Kompute Manager with default settings (device 0, first queue and no extensions)
|
|
|
+ kp::Manager mgr;
|
|
|
+
|
|
|
+ // 2. Create and initialise Kompute Tensors through manager
|
|
|
+
|
|
|
+ // Default tensor constructor simplifies creation of float values
|
|
|
+ auto tensorInA = mgr.tensor({ 2., 2., 2. });
|
|
|
+ auto tensorInB = mgr.tensor({ 1., 2., 3. });
|
|
|
+ // Explicit type constructor supports uint32, int32, double, float and bool
|
|
|
+ auto tensorOutA = mgr.tensorT<uint32_t>({ 0, 0, 0 });
|
|
|
+ auto tensorOutB = mgr.tensorT<uint32_t>({ 0, 0, 0 });
|
|
|
+
|
|
|
+ std::vector<std::shared_ptr<kp::Tensor>> params = {tensorInA, tensorInB, tensorOutA, tensorOutB};
|
|
|
+
|
|
|
+ // 3. Create algorithm based on shader (supports buffers & push/spec constants)
|
|
|
+ kp::Workgroup workgroup({3, 1, 1});
|
|
|
+ std::vector<float> specConsts({ 2 });
|
|
|
+ std::vector<float> pushConstsA({ 2.0 });
|
|
|
+ std::vector<float> pushConstsB({ 3.0 });
|
|
|
+
|
|
|
+ auto algorithm = mgr.algorithm(params,
|
|
|
+ // See documentation shader section for compileSource
|
|
|
+ compileSource(shader),
|
|
|
+ workgroup,
|
|
|
+ specConsts,
|
|
|
+ pushConstsA);
|
|
|
+
|
|
|
+ // 4. Run operation synchronously using sequence
|
|
|
+ mgr.sequence()
|
|
|
+ ->record<kp::OpTensorSyncDevice>(params)
|
|
|
+ ->record<kp::OpAlgoDispatch>(algorithm) // Binds default push consts
|
|
|
+ ->eval(); // Evaluates the two recorded operations
|
|
|
+ //->record<kp::OpAlgoDispatch>(algorithm, pushConstsB) // Overrides push consts
|
|
|
+ //->eval(); // Evaluates only last recorded operation
|
|
|
+
|
|
|
+ // 5. Sync results from the GPU asynchronously
|
|
|
+ auto sq = mgr.sequence();
|
|
|
+ sq->evalAsync<kp::OpTensorSyncLocal>(params);
|
|
|
+
|
|
|
+ // ... Do other work asynchronously whilst GPU finishes
|
|
|
+
|
|
|
+ sq->evalAwait();
|
|
|
+
|
|
|
+ // Prints the first output which is: { 4, 8, 12 }
|
|
|
+ for (const float& elem : tensorOutA->vector()) std::cout << elem << " ";
|
|
|
+ std::cout << "\n";
|
|
|
+ // Prints the second output which is: { 10, 10, 10 }
|
|
|
+ for (const float& elem : tensorOutB->vector()) std::cout << elem << " ";
|
|
|
+
|
|
|
+ std::cout << "\n";
|
|
|
+} // Manages / releases all CPU and GPU memory resources
|
|
|
+
|
|
|
+int main() {
|
|
|
+
|
|
|
+ // Define a raw string shader (or use the Kompute tools to compile to SPIRV / C++ header
|
|
|
+ // files). This shader shows some of the main components including constants, buffers, etc
|
|
|
+ std::string shader = (R"(
|
|
|
+ #version 450
|
|
|
+
|
|
|
+ layout (local_size_x = 1) in;
|
|
|
+
|
|
|
+ // The input tensors bind index is relative to index in parameter passed
|
|
|
+ layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
|
|
|
+ layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
|
|
|
+ layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
|
|
|
+ layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
|
|
|
+
|
|
|
+ // Kompute supports push constants updated on dispatch
|
|
|
+ layout(push_constant) uniform PushConstants {
|
|
|
+ float val;
|
|
|
+ } push_const;
|
|
|
+
|
|
|
+ // Kompute also supports spec constants on initalization
|
|
|
+ layout(constant_id = 0) const float const_one = 0;
|
|
|
|
|
|
- fmt::print("\nResult B:\n");
|
|
|
- for (const float& elem: tensor_out_b->vector()) {
|
|
|
- //fmt::print("{} ", elem);
|
|
|
-
|
|
|
- std::cout << elem << " ";
|
|
|
+ //[2, 2, 2]
|
|
|
+ //[1, 2, 3]
|
|
|
+ //[4, 8, 10]
|
|
|
+
|
|
|
+ void main() {
|
|
|
+ uint index = gl_GlobalInvocationID.x;
|
|
|
+ out_a[index] += uint( in_a[index] * in_b[index] );
|
|
|
+ out_b[index] += uint(push_const.val);
|
|
|
}
|
|
|
+ )");
|
|
|
|
|
|
-}
|
|
|
-
|
|
|
-int main(int argc, char **argv) {
|
|
|
- std::string shader = (R"(
|
|
|
- #version 450
|
|
|
- layout (local_size_x = 1) in;
|
|
|
- layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
|
|
|
- layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
|
|
|
- layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
|
|
|
- layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
|
|
|
-
|
|
|
- layout(push_constant) uniform PushConstants {
|
|
|
- float val;
|
|
|
- }push_const;
|
|
|
-
|
|
|
- layout (constant_id = 0) const float const_one = 0;
|
|
|
-
|
|
|
- void main() {
|
|
|
- uint index = gl_GlobalInvocationID.x;
|
|
|
- out_a[index] += uint( in_a[index] * in_b[index] );
|
|
|
- out_b[index] += uint( const_one * push_const.val );
|
|
|
-
|
|
|
- }
|
|
|
- )");
|
|
|
-
|
|
|
- kompute(shader);
|
|
|
+ // Run the function declared above with our raw string shader
|
|
|
+ kompute(shader);
|
|
|
}
|