[Apache TVM Discuss] [Questions] How can I use TVM+Tensorrt for algorithm deployment and achieve better performance than using Tensorrt alone

WJH via Apache TVM Discuss Wed, 16 Jul 2025 19:17:40 -0700


import onnx
import numpy as np
import tvm
from tvm import runtime
import tvm.relay as relay
import tvm.relax as relax
from tvm.contrib import graph_executor,debugger
from tvm.relay.op.contrib import tensorrt
import logging


from tvm.relay.op.contrib.tensorrt import 
partition_for_tensorrt,get_tensorrt_version,is_tensorrt_runtime_enabled,get_tensorrt_target
######################################################################
# Load pretrained ONNX model
# ---------------------------------------------

print(is_tensorrt_runtime_enabled())
print(get_tensorrt_target())
print(get_tensorrt_version())
model_path = "models/yolov5s.v5.onnx"
print(tvm.__file__)
logging.basicConfig(level=logging.DEBUG)
onnx_model = onnx.load(model_path)

# Compile the model with relay
BATCH_SIZE  = 1
input_shape = (BATCH_SIZE, 3, 640, 640)

input_name = "images"
dtype="float16"

shape_dict = {input_name: input_shape}
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict,dtype=dtype)
mod = relay.transform.InferType()(mod)
mod = tensorrt.partition_for_tensorrt(mod)

with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod,target="cuda",params=params)

dev = tvm.cuda(0)

module_exec = graph_executor.GraphModule(lib["default"](dev))

x_data = np.random.uniform(-1, 1, input_shape).astype(dtype)
module_exec.set_input(input_name, x_data)
print(module_exec.benchmark(dev,number=1,repeat=1))





---
[Visit 
Topic](https://discuss.tvm.apache.org/t/how-can-i-use-tvm-tensorrt-for-algorithm-deployment-and-achieve-better-performance-than-using-tensorrt-alone/18473/1)
 to respond.

You are receiving this because you enabled mailing list mode.

To unsubscribe from these emails, [click 
here](https://discuss.tvm.apache.org/email/unsubscribe/27d52568e78ef06afa363069b664dd547c238d3de838503099cf1c051d014a6b).

[Apache TVM Discuss] [Questions] How can I use TVM+Tensorrt for algorithm deployment and achieve better performance than using Tensorrt alone

Reply via email to