cbalint13 commented on code in PR #18528: URL: https://github.com/apache/tvm/pull/18528#discussion_r2587970549
########## tests/python/meta_schedule/test_meta_schedule_mma_tensorize.py: ########## @@ -0,0 +1,319 @@ +import tvm +import numpy as np +from tvm.script import tir as T +from tvm.tir.schedule import Schedule +import tvm.tir.tensor_intrin # pylint: disable=unused-import +import tvm.testing +import torch + +import pytest + +M, N, K = 4096, 4096, 4096 +np.random.seed(0) + + [email protected]_module +class Gemm_F16F16F16: + # fmt: off + @T.prim_func + def main( + A: T.Buffer((M, K), "float16"), # type: ignore + B: T.Buffer((K, N), "float16"), # type: ignore + C: T.Buffer((M, N), "float16"), # type: ignore + ): + for i, j, k in T.grid(M, N, K): + with T.block("C"): + vi, vj, vk = T.axis.remap("SSR", [i, j, k]) + with T.init(): + C[vi, vj] = T.float32(0) + C[vi, vj] = C[vi, vj] + A[vi, vk] * B[vk, vj] + + [email protected]_module +class Gemm_F16F16F32: + # fmt: off + @T.prim_func + def main( + A: T.Buffer((M, K), "float16"), # type: ignore + B: T.Buffer((K, N), "float16"), # type: ignore + C: T.Buffer((M, N), "float32"), # type: ignore + ): + for i, j, k in T.grid(M, N, K): + with T.block("C"): + vi, vj, vk = T.axis.remap("SSR", [i, j, k]) + with T.init(): + C[vi, vj] = T.float32(0) + C[vi, vj] = C[vi, vj] + T.cast(A[vi, vk], "float32") * T.cast(B[vk, vj], "float32") + + [email protected]_cuda +def test_run_target(mod=None, tgt_str=None, in_dtype="float16", out_dtype="float16"): + if mod is None: + return + tgt_str = tgt_str or "cuda" + target = tvm.target.Target(target=tgt_str) + with tvm.transform.PassContext(opt_level=3): + lib: tvm.runtime.Module = tvm.compile(mod, target=target) + + dev = tvm.device(tgt_str, 0) + a_np = np.random.rand(M, K).astype(in_dtype) + b_np = np.random.rand(K, N).astype(in_dtype) + c_np = np.ones((M, N), dtype=out_dtype) + a = tvm.runtime.tensor(a_np, dev) + b = tvm.runtime.tensor(b_np, dev) + c = tvm.runtime.tensor(c_np, dev) + + f = lib["main"] + f(a, b, c) + + c_th = torch.matmul( + torch.tensor(a_np).to(tgt_str), torch.tensor(b_np).to(tgt_str) + ).to(torch.float32 if out_dtype == "float32" else torch.float16) + c_f = torch.tensor(c.numpy()).to(tgt_str) + torch.allclose(c_th, c_f, rtol=0.05, atol=0.05) Review Comment: Currently external torch invocation is not supported. I would suggest to let's stick to numpy for this test here. Also, see the CI failure: "E ModuleNotFoundError: No module named 'torch'" -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
