[2] Without graph tuning (a) profiler_vm ``` One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details. Name Duration (us) Percent Argument Shapes layout Hash data_layout out_layout Device kernel_layout Count fused_nn_conv2d_multiply_add_nn_relu_8 18,312.26 13.48 float32[1, 256, 14, 14], float32[256, 256, 3, 3], float32[256, 1, 1], float32[256, 1, 1], float32[1, 256, 14, 14] 2cdb64071c823e24 NCHW cpu0 OIHW 6 fused_nn_conv2d_multiply_add_nn_relu_11 15,536.25 11.44 float32[1, 512, 7, 7], float32[512, 512, 3, 3], float32[512, 1, 1], float32[512, 1, 1], float32[1, 512, 7, 7] ac5db8098bc41e35 NCHW cpu0 OIHW 3 fused_nn_conv2d_multiply_add_nn_relu_5 11,510.85 8.47 float32[1, 128, 28, 28], float32[128, 128, 3, 3], float32[128, 1, 1], float32[128, 1, 1], float32[1, 128, 28, 28] 1e2717b5beb2fa67 NCHW cpu0 OIHW 4 fused_nn_conv2d_multiply_add_nn_relu_9 8,864.91 6.53 float32[1, 1024, 14, 14], float32[256, 1024, 1, 1], float32[256, 1, 1], float32[256, 1, 1], float32[1, 256, 14, 14] d13eb3b00a8d5f35 NCHW cpu0 OIHW 5 fused_nn_conv2d_multiply_add_nn_relu_2 8,638.60 6.36 float32[1, 64, 56, 56], float32[64, 64, 3, 3], float32[64, 1, 1], float32[64, 1, 1], float32[1, 64, 56, 56] 9e6b01a1c3c8a068 NCHW cpu0 OIHW 3 fused_nn_conv2d_add_2 8,204.04 6.04 float32[1, 256, 14, 14], float32[1024, 256, 1, 1], float32[1, 1024, 14, 14], float32[1, 1024, 14, 14] 5220d30314ead0f1 NCHW cpu0 OIHW 5 fused_nn_conv2d_add_1 5,331.53 3.92 float32[1, 128, 28, 28], float32[512, 128, 1, 1], float32[1, 512, 28, 28], float32[1, 512, 28, 28] 0bf103915aebe126 NCHW cpu0 OIHW 3 fused_nn_conv2d_3 5,018.85 3.69 float32[1, 1024, 14, 14], float32[2048, 1024, 1, 1], float32[1, 2048, 7, 7] 6d6eb730bfedd923 NCHW cpu0 OIHW 1 fused_nn_conv2d_multiply_add_nn_relu_6 4,782.81 3.52 float32[1, 512, 28, 28], float32[128, 512, 1, 1], float32[128, 1, 1], float32[128, 1, 1], float32[1, 128, 28, 28] 4e037f410da9f71b NCHW cpu0 OIHW 3 fused_nn_conv2d_add 4,655.16 3.43 float32[1, 64, 56, 56], float32[256, 64, 1, 1], float32[1, 256, 56, 56], float32[1, 256, 56, 56] 0579ca31a5deb349 NCHW cpu0 OIHW 2 fused_nn_conv2d_multiply_add_nn_relu_12 4,458.45 3.28 float32[1, 2048, 7, 7], float32[512, 2048, 1, 1], float32[512, 1, 1], float32[512, 1, 1], float32[1, 512, 7, 7] 910c4036cd67e89a NCHW cpu0 OIHW 2 fused_nn_conv2d_add_3 4,426.89 3.26 float32[1, 512, 7, 7], float32[2048, 512, 1, 1], float32[1, 2048, 7, 7], float32[1, 2048, 7, 7] 49d9928cdfecc5cf NCHW cpu0 OIHW 2 fused_nn_conv2d_multiply_add_nn_relu_3 4,271.83 3.14 float32[1, 256, 56, 56], float32[64, 256, 1, 1], float32[64, 1, 1], float32[64, 1, 1], float32[1, 64, 56, 56] 811cb902928c44b7 NCHW cpu0 OIHW 2 fused_nn_conv2d_1 3,825.73 2.82 float32[1, 256, 56, 56], float32[512, 256, 1, 1], float32[1, 512, 28, 28] 1a74ea9d21fe242b NCHW cpu0 OIHW 1 fused_nn_conv2d_multiply_add_nn_relu 3,396.77 2.50 float32[1, 3, 224, 224], float32[64, 3, 7, 7], float32[64, 1, 1], float32[64, 1, 1], float32[1, 64, 112, 112] dbd095d1f70608d4 NCHW cpu0 OIHW 1 fused_nn_conv2d_2 3,289.00 2.42 float32[1, 512, 28, 28], float32[1024, 512, 1, 1], float32[1, 1024, 14, 14] d06d46290b62d7fe NCHW cpu0 OIHW 1 fused_nn_conv2d_add_multiply_add_nn_relu 2,393.57 1.76 float32[1, 64, 56, 56], float32[256, 64, 1, 1], float32[1, 256, 56, 56], float32[256, 1, 1], float32[256, 1, 1], float32[1, 256, 56, 56] ac4d220a67fcb1ee NCHW cpu0 OIHW 1 fused_nn_conv2d_add_multiply_add_nn_relu_3 2,082.21 1.53 float32[1, 512, 7, 7], float32[2048, 512, 1, 1], float32[1, 2048, 7, 7], float32[2048, 1, 1], float32[2048, 1, 1], float32[1, 2048, 7, 7] a44fe9b9e8d2da07 NCHW cpu0 OIHW 1 fused_nn_dense_nn_bias_add 2,008.92 1.48 float32[1, 2048], float32[1000, 2048], float32[1000], float32[1, 1000] d7434de44c54529a cpu0 1 fused_nn_conv2d 1,972.36 1.45 float32[1, 64, 56, 56], float32[256, 64, 1, 1], float32[1, 256, 56, 56] 336879824a51f323 NCHW cpu0 OIHW 1 fused_nn_conv2d_add_multiply_add_nn_relu_1 1,782.06 1.31 float32[1, 128, 28, 28], float32[512, 128, 1, 1], float32[1, 512, 28, 28], float32[512, 1, 1], float32[512, 1, 1], float32[1, 512, 28, 28] 94d9f51ec760c01b NCHW cpu0 OIHW 1 fused_nn_conv2d_multiply_add_nn_relu_4 1,631.71 1.20 float32[1, 256, 56, 56], float32[128, 256, 1, 1], float32[128, 1, 1], float32[128, 1, 1], float32[1, 128, 28, 28] 85514dd90f0099e4 NCHW cpu0 OIHW 1 fused_nn_conv2d_add_multiply_add_nn_relu_2 1,623.09 1.19 float32[1, 256, 14, 14], float32[1024, 256, 1, 1], float32[1, 1024, 14, 14], float32[1024, 1, 1], float32[1024, 1, 1], float32[1, 1024, 14, 14] ff2ba4afc4b00ccf NCHW cpu0 OIHW 1 fused_multiply_add_nn_relu 1,390.58 1.02 float32[1, 256, 56, 56], float32[256, 1, 1], float32[256, 1, 1], float32[1, 256, 56, 56] 9a48c23d6d41bd2f cpu0 2 fused_nn_conv2d_multiply_add_nn_relu_10 1,231.09 0.91 float32[1, 1024, 14, 14], float32[512, 1024, 1, 1], float32[512, 1, 1], float32[512, 1, 1], float32[1, 512, 7, 7] 5506cbf207ccd131 NCHW cpu0 OIHW 1 fused_nn_conv2d_multiply_add_nn_relu_7 982.67 0.72 float32[1, 512, 28, 28], float32[256, 512, 1, 1], float32[256, 1, 1], float32[256, 1, 1], float32[1, 256, 14, 14] 341159a3c2f00cae NCHW cpu0 OIHW 1 fused_multiply_add_nn_relu_1 873.20 0.64 float32[1, 512, 28, 28], float32[512, 1, 1], float32[512, 1, 1], float32[1, 512, 28, 28] 004373fd83ff4e02 cpu0 3 fused_nn_max_pool2d_multiply_add_nn_relu 870.70 0.64 float32[1, 64, 112, 112], float32[64, 1, 1], float32[64, 1, 1], float32[1, 64, 56, 56] NCHW c55e51cdf27573bb cpu0 1 fused_multiply_add_nn_relu_2 554.39 0.41 float32[1, 1024, 14, 14], float32[1024, 1, 1], float32[1024, 1, 1], float32[1, 1024, 14, 14] 9eb2396efef0e312 cpu0 5 fused_nn_conv2d_multiply_add_nn_relu_1 530.49 0.39 float32[1, 64, 56, 56], float32[64, 64, 1, 1], float32[64, 1, 1], float32[64, 1, 1], float32[1, 64, 56, 56] 482f6fc9ff278c3f NCHW cpu0 OIHW 1 fused_multiply_add_nn_relu_3 109.80 0.08 float32[1, 2048, 7, 7], float32[2048, 1, 1], float32[2048, 1, 1], float32[1, 2048, 7, 7] b74dcecdab14a995 cpu0 2 fused_multiply_add 97.47 0.07 float32[1, 3, 224, 224], float32[3, 1, 1], float32[3, 1, 1], float32[1, 3, 224, 224] 65ced11c4ebbde8f cpu0 1 fused_nn_global_avg_pool2d 54.84 0.04 float32[1, 2048, 7, 7], float32[1, 2048, 1, 1] NCHW 9589c5c75edc8cf4 cpu0 1 fused_nn_softmax 10.39 0.01 float32[1, 1000], float32[1, 1000] ca61e79ea24e53f0 cpu0 1 fused_nn_batch_flatten 1.89 0.00 float32[1, 2048, 1, 1], float32[1, 2048] 8af63b18f42fefd8 cpu0 1 ---------- Sum 1,34,725.32 99.16 71 Total 1,35,860.04 cpu0 1 ``` (b) debug_executor ``` One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details. Name Duration (us) Percent Argument Shapes layout Hash data_layout out_layout Device kernel_layout Count tvmgen_default_fused_nn_conv2d_multiply_add_nn_relu_3 17,370.19 12.63 float32[1, 256, 14, 14], float32[256, 256, 3, 3], float32[256, 1, 1], float32[256, 1, 1], float32[1, 256, 14, 14] 2cdb64071c823e24 NCHW cpu0 OIHW 6 tvmgen_default_fused_nn_conv2d_multiply_add_nn_relu 11,964.35 8.70 float32[1, 512, 7, 7], float32[512, 512, 3, 3], float32[512, 1, 1], float32[512, 1, 1], float32[1, 512, 7, 7] ac5db8098bc41e35 NCHW cpu0 OIHW 3 tvmgen_default_fused_nn_conv2d_multiply_add_nn_relu_11 11,740.92 8.54 float32[1, 64, 56, 56], float32[64, 64, 1, 1], float32[64, 1, 1], float32[64, 1, 1], float32[1, 64, 56, 56] 482f6fc9ff278c3f NCHW cpu0 OIHW 1 tvmgen_default_fused_nn_conv2d_multiply_add_nn_relu_6 11,105.73 8.08 float32[1, 128, 28, 28], float32[128, 128, 3, 3], float32[128, 1, 1], float32[128, 1, 1], float32[1, 128, 28, 28] 1e2717b5beb2fa67 NCHW cpu0 OIHW 4 tvmgen_default_fused_nn_conv2d_multiply_add_nn_relu_9 8,865.77 6.45 float32[1, 64, 56, 56], float32[64, 64, 3, 3], float32[64, 1, 1], float32[64, 1, 1], float32[1, 64, 56, 56] 9e6b01a1c3c8a068 NCHW cpu0 OIHW 3 tvmgen_default_fused_nn_conv2d_add_multiply_add_nn_relu_3 8,005.04 5.82 float32[1, 64, 56, 56], float32[256, 64, 1, 1], float32[1, 256, 56, 56], float32[256, 1, 1], float32[256, 1, 1], float32[1, 256, 56, 56] ac4d220a67fcb1ee NCHW cpu0 OIHW 1 tvmgen_default_fused_nn_conv2d_add_1 7,359.88 5.35 float32[1, 256, 14, 14], float32[1024, 256, 1, 1], float32[1, 1024, 14, 14], float32[1, 1024, 14, 14] 5220d30314ead0f1 NCHW cpu0 OIHW 5 tvmgen_default_fused_nn_conv2d_multiply_add_nn_relu_4 7,026.80 5.11 float32[1, 1024, 14, 14], float32[256, 1024, 1, 1], float32[256, 1, 1], float32[256, 1, 1], float32[1, 256, 14, 14] d13eb3b00a8d5f35 NCHW cpu0 OIHW 5 tvmgen_default_fused_nn_conv2d_add_2 4,732.52 3.44 float32[1, 128, 28, 28], float32[512, 128, 1, 1], float32[1, 512, 28, 28], float32[1, 512, 28, 28] 0bf103915aebe126 NCHW cpu0 OIHW 3 tvmgen_default_fused_nn_conv2d_multiply_add_nn_relu_7 4,503.68 3.28 float32[1, 512, 28, 28], float32[128, 512, 1, 1], float32[128, 1, 1], float32[128, 1, 1], float32[1, 128, 28, 28] 4e037f410da9f71b NCHW cpu0 OIHW 3 tvmgen_default_fused_nn_conv2d_multiply_add_nn_relu_10 4,475.41 3.25 float32[1, 256, 56, 56], float32[64, 256, 1, 1], float32[64, 1, 1], float32[64, 1, 1], float32[1, 64, 56, 56] 811cb902928c44b7 NCHW cpu0 OIHW 2 tvmgen_default_fused_nn_conv2d_add_3 4,407.82 3.21 float32[1, 64, 56, 56], float32[256, 64, 1, 1], float32[1, 256, 56, 56], float32[1, 256, 56, 56] 0579ca31a5deb349 NCHW cpu0 OIHW 2 tvmgen_default_fused_nn_conv2d_3 3,409.06 2.48 float32[1, 1024, 14, 14], float32[2048, 1024, 1, 1], float32[1, 2048, 7, 7] 6d6eb730bfedd923 NCHW cpu0 OIHW 1 tvmgen_default_fused_nn_conv2d_add 3,405.21 2.48 float32[1, 512, 7, 7], float32[2048, 512, 1, 1], float32[1, 2048, 7, 7], float32[1, 2048, 7, 7] 49d9928cdfecc5cf NCHW cpu0 OIHW 2 tvmgen_default_fused_nn_conv2d_multiply_add_nn_relu_1 3,395.20 2.47 float32[1, 2048, 7, 7], float32[512, 2048, 1, 1], float32[512, 1, 1], float32[512, 1, 1], float32[1, 512, 7, 7] 910c4036cd67e89a NCHW cpu0 OIHW 2 tvmgen_default_fused_nn_conv2d_multiply_add_nn_relu_12 3,365.71 2.45 float32[1, 3, 224, 224], float32[64, 3, 7, 7], float32[64, 1, 1], float32[64, 1, 1], float32[1, 64, 112, 112] dbd095d1f70608d4 NCHW cpu0 OIHW 1 tvmgen_default_fused_nn_conv2d_1 3,357.74 2.44 float32[1, 256, 56, 56], float32[512, 256, 1, 1], float32[1, 512, 28, 28] 1a74ea9d21fe242b NCHW cpu0 OIHW 1 tvmgen_default_fused_nn_conv2d_2 2,957.75 2.15 float32[1, 512, 28, 28], float32[1024, 512, 1, 1], float32[1, 1024, 14, 14] d06d46290b62d7fe NCHW cpu0 OIHW 1 tvmgen_default_fused_nn_conv2d 1,823.12 1.33 float32[1, 64, 56, 56], float32[256, 64, 1, 1], float32[1, 256, 56, 56] 336879824a51f323 NCHW cpu0 OIHW 1 tvmgen_default_fused_nn_conv2d_add_multiply_add_nn_relu 1,656.22 1.20 float32[1, 512, 7, 7], float32[2048, 512, 1, 1], float32[1, 2048, 7, 7], float32[2048, 1, 1], float32[2048, 1, 1], float32[1, 2048, 7, 7] a44fe9b9e8d2da07 NCHW cpu0 OIHW 1 tvmgen_default_fused_nn_conv2d_add_multiply_add_nn_relu_2 1,604.78 1.17 float32[1, 128, 28, 28], float32[512, 128, 1, 1], float32[1, 512, 28, 28], float32[512, 1, 1], float32[512, 1, 1], float32[1, 512, 28, 28] 94d9f51ec760c01b NCHW cpu0 OIHW 1 tvmgen_default_fused_nn_dense_nn_bias_add 1,567.51 1.14 float32[1, 2048], float32[1000, 2048], float32[1000], float32[1, 1000] d7434de44c54529a cpu0 1 tvmgen_default_fused_nn_conv2d_add_multiply_add_nn_relu_1 1,559.91 1.13 float32[1, 256, 14, 14], float32[1024, 256, 1, 1], float32[1, 1024, 14, 14], float32[1024, 1, 1], float32[1024, 1, 1], float32[1, 1024, 14, 14] ff2ba4afc4b00ccf NCHW cpu0 OIHW 1 tvmgen_default_fused_nn_conv2d_multiply_add_nn_relu_8 1,444.25 1.05 float32[1, 256, 56, 56], float32[128, 256, 1, 1], float32[128, 1, 1], float32[128, 1, 1], float32[1, 128, 28, 28] 85514dd90f0099e4 NCHW cpu0 OIHW 1 tvmgen_default_fused_multiply_add_nn_relu_3 1,433.78 1.04 float32[1, 256, 56, 56], float32[256, 1, 1], float32[256, 1, 1], float32[1, 256, 56, 56] 9a48c23d6d41bd2f cpu0 2 tvmgen_default_fused_nn_conv2d_multiply_add_nn_relu_2 869.02 0.63 float32[1, 1024, 14, 14], float32[512, 1024, 1, 1], float32[512, 1, 1], float32[512, 1, 1], float32[1, 512, 7, 7] 5506cbf207ccd131 NCHW cpu0 OIHW 1 tvmgen_default_fused_nn_max_pool2d_multiply_add_nn_relu 835.63 0.61 float32[1, 64, 112, 112], float32[64, 1, 1], float32[64, 1, 1], float32[1, 64, 56, 56] NCHW c55e51cdf27573bb cpu0 1 tvmgen_default_fused_multiply_add_nn_relu_2 786.12 0.57 float32[1, 512, 28, 28], float32[512, 1, 1], float32[512, 1, 1], float32[1, 512, 28, 28] 004373fd83ff4e02 cpu0 3 tvmgen_default_fused_nn_conv2d_multiply_add_nn_relu_5 784.15 0.57 float32[1, 512, 28, 28], float32[256, 512, 1, 1], float32[256, 1, 1], float32[256, 1, 1], float32[1, 256, 14, 14] 341159a3c2f00cae NCHW cpu0 OIHW 1 tvmgen_default_fused_multiply_add_nn_relu_1 428.09 0.31 float32[1, 1024, 14, 14], float32[1024, 1, 1], float32[1024, 1, 1], float32[1, 1024, 14, 14] 9eb2396efef0e312 cpu0 5 tvmgen_default_fused_multiply_add 334.36 0.24 float32[1, 3, 224, 224], float32[3, 1, 1], float32[3, 1, 1], float32[1, 3, 224, 224] 65ced11c4ebbde8f cpu0 1 tvmgen_default_fused_multiply_add_nn_relu 91.58 0.07 float32[1, 2048, 7, 7], float32[2048, 1, 1], float32[2048, 1, 1], float32[1, 2048, 7, 7] b74dcecdab14a995 cpu0 2 tvmgen_default_fused_nn_global_avg_pool2d 39.10 0.03 float32[1, 2048, 7, 7], float32[1, 2048, 1, 1] NCHW 9589c5c75edc8cf4 cpu0 1 tvmgen_default_fused_nn_softmax 9.62 0.01 float32[1, 1000], float32[1, 1000] ca61e79ea24e53f0 cpu0 1 tvmgen_default_fused_nn_batch_flatten 1.53 0.00 float32[1, 2048, 1, 1], float32[1, 2048] 8af63b18f42fefd8 cpu0 1 ---------- Sum 1,36,717.56 99.42 71 Total 1,37,508.67 cpu0 1 ``` (c) benchmark ``` One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details. Evaluate inference time cost... Execution time summary: mean (ms) median (ms) max (ms) min (ms) std (ms) 131.7054 131.8304 131.8502 131.4355 0.1910 ``` --- [Visit Topic](https://discuss.tvm.apache.org/t/difference-in-profiler-outputs/11255/8) to respond. You are receiving this because you enabled mailing list mode. To unsubscribe from these emails, [click here](https://discuss.tvm.apache.org/email/unsubscribe/2482718d578c39cab9ba1c70dc33a86401c18eb74b7c0d7d196aed98f43cd5ce).
[Apache TVM Discuss] [Questions] Difference in profiler outputs
Alan Nair via Apache TVM Discuss Wed, 27 Oct 2021 23:41:23 -0700
- [Apache TVM Discuss] [Questions] D... Alan Nair via Apache TVM Discuss
- [Apache TVM Discuss] [Questio... Tristan Konolige via Apache TVM Discuss
- [Apache TVM Discuss] [Que... Alan Nair via Apache TVM Discuss
- [Apache TVM Discuss] ... Tristan Konolige via Apache TVM Discuss
- [Apache TVM Discu... Alan Nair via Apache TVM Discuss
- [Apache TVM Discuss] [Questio... Alan Nair via Apache TVM Discuss