I guess there is no way to enforce parallelism to GraphRuntime. Parallelism only exists in the module. The GraphRuntime is designed to realize heterogeneous execution. ``` Module GraphRuntimeCreate(const std::string& sym_json, const tvm::runtime::Module& m, const std::vector<TVMContext>& ctxs) { auto exec = make_object<GraphRuntime>(); exec->Init(sym_json, m, ctxs); return Module(exec); } ``` I guess the json is compute graph, module is the packed function used in the compute graph, and context tells runtime to choose the correct device api. ``` std::pair<std::function<void()>, std::shared_ptr<GraphRuntime::OpArgs> > GraphRuntime::CreateTVMOp( const TVMOpParam& param, const std::vector<DLTensor>& args, size_t num_inputs) { std::shared_ptr<GraphRuntime::OpArgs> arg_ptr = std::make_shared<GraphRuntime::OpArgs>(); // setup address. arg_ptr->args = args; if (param.flatten_data) { arg_ptr->shape_data.resize(arg_ptr->args.size()); } for (size_t i = 0; i < arg_ptr->args.size(); ++i) { TVMValue v; DLTensor* t = &arg_ptr->args[i]; v.v_handle = t; arg_ptr->arg_values.push_back(v); arg_ptr->arg_tcodes.push_back(kTVMDLTensorHandle); if (param.flatten_data) { arg_ptr->shape_data[i] = std::accumulate( t->shape, t->shape + t->ndim, 1, std::multiplies<int64_t>()); t->ndim = 1; t->shape = &(arg_ptr->shape_data[i]); } }
if (param.func_name == "__nop") { return {[](){}, arg_ptr}; } else if (param.func_name == "__copy") { // Perform cross device data copy. // Directly copy data from the input to the output. auto fexec = [arg_ptr]() { DLTensor* from = static_cast<DLTensor*>(arg_ptr->arg_values[0].v_handle); DLTensor* to = static_cast<DLTensor*>(arg_ptr->arg_values[1].v_handle); TVM_CCALL(TVMArrayCopyFromTo(from, to, nullptr)); }; return {fexec, arg_ptr}; } // Get compiled function from the module that contains both host and device // code. tvm::runtime::PackedFunc pf = module_.GetFunction(param.func_name, true); CHECK(pf != nullptr) << "no such function in module: " << param.func_name; auto fexec = [arg_ptr, pf]() { TVMRetValue rv; TVMArgs targs(arg_ptr->arg_values.data(), arg_ptr->arg_tcodes.data(), static_cast<int>(arg_ptr->arg_values.size())); pf.CallPacked(targs, &rv); }; return {fexec, arg_ptr}; } ``` There is no strategy to enforce parallelism to the op_execs_. It just does what graph json tells. --- [Visit Topic](https://discuss.tvm.ai/t/execution-order-of-operators-at-runtime-in-tvm/6572/5) to respond. You are receiving this because you enabled mailing list mode. To unsubscribe from these emails, [click here](https://discuss.tvm.ai/email/unsubscribe/4680ccab63aa0c88d5b225d1bb8e6407934bb2e367d3b203eec35546a53268ef).