vor 2 Jahren · a88620b92b
--- a/main.cpp
+++ b/main.cpp
@@ -19,78 +19,94 @@ static std::vector<uint32_t> compileSource(const std::string& source) {
 
				         return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};
			
 
				 
			
 
				 }
			
 
				-
			
 
				-void kompute(const std::string &shader) {
			
 
				-        kp::Manager mgr;
			
 
				-        
			
 
				-        auto tensor_in_a = mgr.tensor({2., 2., 2.});
			
 
				-        auto tensor_in_b = mgr.tensor({1., 2., 3.});
			
 
				-
			
 
				-        auto tensor_out_a = mgr.tensorT<uint32_t>({0, 0, 0});
			
 
				-        auto tensor_out_b = mgr.tensorT<uint32_t>({0, 0, 0});
			
 
				-
			
 
				-        std::vector<std::shared_ptr<kp::Tensor>> params = {
			
 
				-                tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b };
			
 
				-
			
 
				-        kp::Workgroup workgroup({3, 1, 1});
			
 
				-        std::vector<float> specConsts ({ 2 });
			
 
				-        std::vector<float> pushConstsA ({ 2.0 });
			
 
				-        std::vector<float> pushConstsB ({ 2.0 });
			
 
				-
			
 
				-        auto algorith = mgr.algorithm(params,
			
 
				-                                      compileSource(shader),
			
 
				-                                      workgroup, 
			
 
				-                                      specConsts,
			
 
				-                                      pushConstsA);
			
 
				-
			
 
				-        mgr.sequence()
			
 
				-                ->record<kp::OpTensorSyncDevice>(params)
			
 
				-                ->record<kp::OpAlgoDispatch>(algorith)
			
 
				-                ->eval()
			
 
				-                ->record<kp::OpAlgoDispatch>(algorith, pushConstsB)
			
 
				-                ->eval();
			
 
				-
			
 
				-        auto sq = mgr.sequence();
			
 
				-        sq->evalAsync<kp::OpTensorSyncLocal>(params);
			
 
				-        sq->evalAwait();
			
 
				-
			
 
				-        fmt::print("Result A:\n");
			
 
				-        for (const float& elem: tensor_out_a->vector()) {
			
 
				-                //fmt::print("{} ", elem);
			
 
				-                std::cout << elem << " ";
			
 
				-        }
			
 
				+void kompute(const std::string& shader) {
			
 
				+
			
 
				+    // 1. Create Kompute Manager with default settings (device 0, first queue and no extensions)
			
 
				+    kp::Manager mgr;
			
 
				+
			
 
				+    // 2. Create and initialise Kompute Tensors through manager
			
 
				+
			
 
				+    // Default tensor constructor simplifies creation of float values
			
 
				+    auto tensorInA = mgr.tensor({ 2., 2., 2. });
			
 
				+    auto tensorInB = mgr.tensor({ 1., 2., 3. });
			
 
				+    // Explicit type constructor supports uint32, int32, double, float and bool
			
 
				+    auto tensorOutA = mgr.tensorT<uint32_t>({ 0, 0, 0 });
			
 
				+    auto tensorOutB = mgr.tensorT<uint32_t>({ 0, 0, 0 });
			
 
				+
			
 
				+    std::vector<std::shared_ptr<kp::Tensor>> params = {tensorInA, tensorInB, tensorOutA, tensorOutB};
			
 
				+
			
 
				+    // 3. Create algorithm based on shader (supports buffers & push/spec constants)
			
 
				+    kp::Workgroup workgroup({3, 1, 1});
			
 
				+    std::vector<float> specConsts({ 2 });
			
 
				+    std::vector<float> pushConstsA({ 2.0 });
			
 
				+    std::vector<float> pushConstsB({ 3.0 });
			
 
				+
			
 
				+    auto algorithm = mgr.algorithm(params,
			
 
				+                                   // See documentation shader section for compileSource
			
 
				+                                   compileSource(shader),
			
 
				+                                   workgroup,
			
 
				+                                   specConsts,
			
 
				+                                   pushConstsA);
			
 
				+
			
 
				+    // 4. Run operation synchronously using sequence
			
 
				+    mgr.sequence()
			
 
				+        ->record<kp::OpTensorSyncDevice>(params)
			
 
				+        ->record<kp::OpAlgoDispatch>(algorithm) // Binds default push consts
			
 
				+        ->eval(); // Evaluates the two recorded operations
			
 
				+        //->record<kp::OpAlgoDispatch>(algorithm, pushConstsB) // Overrides push consts
			
 
				+        //->eval(); // Evaluates only last recorded operation
			
 
				+
			
 
				+    // 5. Sync results from the GPU asynchronously
			
 
				+    auto sq = mgr.sequence();
			
 
				+    sq->evalAsync<kp::OpTensorSyncLocal>(params);
			
 
				+
			
 
				+    // ... Do other work asynchronously whilst GPU finishes
			
 
				+
			
 
				+    sq->evalAwait();
			
 
				+
			
 
				+    // Prints the first output which is: { 4, 8, 12 }
			
 
				+    for (const float& elem : tensorOutA->vector()) std::cout << elem << "  ";
			
 
				+    std::cout << "\n";
			
 
				+    // Prints the second output which is: { 10, 10, 10 }
			
 
				+    for (const float& elem : tensorOutB->vector()) std::cout << elem << "  ";
			
 
				+
			
 
				+    std::cout << "\n";
			
 
				+} // Manages / releases all CPU and GPU memory resources
			
 
				+
			
 
				+int main() {
			
 
				+
			
 
				+    // Define a raw string shader (or use the Kompute tools to compile to SPIRV / C++ header
			
 
				+    // files). This shader shows some of the main components including constants, buffers, etc
			
 
				+    std::string shader = (R"(
			
 
				+        #version 450
			
 
				+
			
 
				+        layout (local_size_x = 1) in;
			
 
				+
			
 
				+        // The input tensors bind index is relative to index in parameter passed
			
 
				+        layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
			
 
				+        layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
			
 
				+        layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
			
 
				+        layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
			
 
				+
			
 
				+        // Kompute supports push constants updated on dispatch
			
 
				+        layout(push_constant) uniform PushConstants {
			
 
				+            float val;
			
 
				+        } push_const;
			
 
				+
			
 
				+        // Kompute also supports spec constants on initalization
			
 
				+        layout(constant_id = 0) const float const_one = 0;
			
 
				         
			
 
				-        fmt::print("\nResult B:\n");
			
 
				-        for (const float& elem: tensor_out_b->vector()) {
			
 
				-                //fmt::print("{} ", elem);
			
 
				-
			
 
				-                std::cout << elem << " ";
			
 
				+        //[2, 2,  2]
			
 
				+        //[1, 2,  3]
			
 
				+        //[4, 8, 10]
			
 
				+
			
 
				+        void main() {
			
 
				+            uint index = gl_GlobalInvocationID.x;
			
 
				+            out_a[index] += uint( in_a[index] * in_b[index] );
			
 
				+            out_b[index] += uint(push_const.val);
			
 
				         }
			
 
				+    )");
			
 
				 
			
 
				-}
			
 
				-
			
 
				-int main(int argc, char **argv) {
			
 
				-  std::string shader = (R"(
			
 
				-                #version 450
			
 
				-                layout (local_size_x = 1) in;
			
 
				-                layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
			
 
				-                layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
			
 
				-                layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
			
 
				-                layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
			
 
				-
			
 
				-                layout(push_constant) uniform PushConstants {
			
 
				-                        float val;
			
 
				-                }push_const;
			
 
				-
			
 
				-                layout (constant_id = 0) const float const_one = 0;
			
 
				-
			
 
				-                void main() {
			
 
				-                        uint index = gl_GlobalInvocationID.x;
			
 
				-                        out_a[index] += uint( in_a[index] * in_b[index] );
			
 
				-                        out_b[index] += uint( const_one * push_const.val );
			
 
				-
			
 
				-                }
			
 
				-        )");
			
 
				-
			
 
				-        kompute(shader);
			
 
				+    // Run the function declared above with our raw string shader
			
 
				+    kompute(shader);
			
 
				 }