Hi everyone,
I'm currently trying to tensorize the schedule for a very simple [4,4] matrix element-wise sum (add) to be performed in 4 [2,2] matrix addition steps by an intrinsic function. I've looked into adapting the tutorial on [Tensorization](https://tvm.apache.org/docs/tutorials/language/tensorize.html) but I cannot get a schedule that compiles as I think I'm not correctly creating buffers (the code gets stuck in the StorageFlattener step). Currently I came up with this schedule, but it doesn't compile (python source code below).: ``` primfn(A_1: handle, B_1: handle, C_1: handle) -> () attr = {"global_symbol": "main", "tir.noalias": True} buffers = {C: Buffer(C_2: Pointer(float32), float32, [4, 4], []), B: Buffer(B_2: Pointer(float32), float32, [4, 4], []), A: Buffer(A_2: Pointer(float32), float32, [4, 4], [])} buffer_map = {A_1: A, B_1: B, C_1: C} { attr [C] "realize_scope" = ""; realize(C, [0:4, 0:4], True { for (i.outer: int32, 0, 2) { for (j.outer: int32, 0, 2) { attr [[A_3: Buffer(A_4: Pointer(float32), float32, [2, 2], [2, 1], elem_offset=A_elem_offset: int32), A]] "buffer_bind_scope" = @tir.tvm_tuple((i.outer*2), 2, (j.outer*2), 2, dtype=handle); attr [[B_3: Buffer(B_4: Pointer(float32), float32, [2, 2], [2, 1], elem_offset=B_elem_offset: int32), B]] "buffer_bind_scope" = @tir.tvm_tuple((i.outer*2), 2, (j.outer*2), 2, dtype=handle); attr [[C_3: Buffer(C_4: Pointer(float32), float32, [2, 2], [2, 1], elem_offset=C_elem_offset: int32), C]] "buffer_bind_scope" = @tir.tvm_tuple((i.outer*2), 2, (j.outer*2), 2, dtype=handle); @tir.call_extern("ews", @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_4, C_elem_offset, 4, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_4, A_elem_offset, 4, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_4, B_elem_offset, 4, 1, dtype=handle), 2, 2, 2, dtype=float32) } } }) } ``` Python source: ``` from __future__ import absolute_import, print_function import tvm from tvm import te def intrin_ews(ro,co,data_type): a = te.placeholder((ro,co), dtype=data_type, name="a") b = te.placeholder((ro,co), dtype=data_type, name="b") c = te.compute((ro,co), lambda i,j: a[i,j] + b[i,j], name="c") # Preview a generic schedule #preview = te.create_schedule(c.op) #print(tvm.lower(preview, [a, b, c], simple_mode=True)) # Define buffers # Offset factor --> optimize for vectorized buffering Ab = tvm.tir.decl_buffer(a.shape, a.dtype, name="A", offset_factor=1, strides=[2,1]) Bb = tvm.tir.decl_buffer(b.shape, b.dtype, name="B", offset_factor=1, strides=[2,1]) Cb = tvm.tir.decl_buffer(c.shape, c.dtype, name="C", offset_factor=1, strides=[2,1]) def intrin_func(ins, outs): # create IR builder ib = tvm.tir.ir_builder.create() aa, bb = ins cc = outs[0] ib.emit( tvm.tir.call_extern( "float32", "ews", cc.access_ptr("w"), aa.access_ptr("r"), bb.access_ptr("r"), ro, co, bb.strides[0], ) ) return ib.get() return te.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb}) rows = 2 cols = 2 data_type = "float32" # Create an instance intrinsic = intrin_ews(rows,cols,data_type) ro = 4 co = 4 # Create a tensorizable schedule A = te.placeholder((ro,co), dtype=data_type, name="A") B = te.placeholder((ro,co), dtype=data_type, name="B") C = te.compute((ro,co), lambda i,j: A[i,j] + B[i,j], name="C") # Create a vanilla schedule s = te.create_schedule(C.op) xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1],x_factor=2,y_factor=2) print(tvm.lower(s, [A, B, C], simple_mode=True)) # Get a handle to the axis # x, y = s[C].op.axis # Tensorize! s[C].tensorize(xi, intrinsic) print(tvm.lower(s, [A, B, C], simple_mode=True)) ``` The stack trace of the error: ``` tvm._ffi.base.TVMError: Traceback (most recent call last): [bt] (8) /home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::NodeFunctor<tvm::tir::Stmt (tvm::runtime::ObjectRef const&, tvm::tir::StmtFunctor<tvm::tir::Stmt (tvm::tir::Stmt const&)>*)>::operator()(tvm::runtime::ObjectRef const&, tvm::tir::StmtFunctor<tvm::tir::Stmt (tvm::tir::Stmt const&)>*) const+0x11d) [0x7f9e3176df4d] [bt] (7) /home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::tir::StmtFunctor<tvm::tir::Stmt (tvm::tir::Stmt const&)>::InitVTable()::{lambda(tvm::runtime::ObjectRef const&, tvm::tir::StmtFunctor<tvm::tir::Stmt (tvm::tir::Stmt const&)>*)#2}::_FUN(tvm::runtime::ObjectRef const&, tvm::tir::StmtFunctor<tvm::tir::Stmt (tvm::tir::Stmt const&)>*)+0x26) [0x7f9e31766516] [bt] (6) /home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::tir::StorageFlattener::VisitStmt_(tvm::tir::AttrStmtNode const*)+0x333) [0x7f9e31d09193] [bt] (5) /home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::tir::StorageFlattener::HandleBufferBindScope(tvm::tir::AttrStmtNode const*)+0xbaf) [0x7f9e31d02f1f] [bt] (4) /home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::tir::ArgBinder::BindBuffer(tvm::tir::Buffer const&, tvm::tir::Buffer const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool)+0xb6a) [0x7f9e31c3d8aa] [bt] (3) /home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::tir::ArgBinder::BindArray(tvm::runtime::Array<tvm::PrimExpr, void> const&, tvm::runtime::Array<tvm::PrimExpr, void> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0x478) [0x7f9e31c3cb58] [bt] (2) /home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::tir::ArgBinder::Bind_(tvm::PrimExpr const&, tvm::PrimExpr const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool)+0x24c) [0x7f9e31c3c06c] [bt] (1) /home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::tir::BinderAddAssert(tvm::arith::Analyzer*, tvm::PrimExpr, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::vector<tvm::tir::Stmt, std::allocator<tvm::tir::Stmt> >*)+0xe0) [0x7f9e31c3ba90] [bt] (0) /home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(+0xad88a6) [0x7f9e31c3b8a6] File "/home/josse/Thesis/tvm-fork/tvm-fork/src/tir/transforms/arg_binder.cc", line 40 TVMError: Bind have an unmet assertion: (bool)0, on argument A.strides[0] ``` I think I'm not correctly using the "buffer_bind_scope" instruction here. I'm not surehow I should make it work, any solutions are comments are very much appreciated! If someone can explain me or give me some pointers on how the buffer bind scope works that would be great! Thanks! --- [Visit Topic](https://discuss.tvm.apache.org/t/te-tensorize-elementwise-sum/9335/1) to respond. You are receiving this because you enabled mailing list mode. To unsubscribe from these emails, [click here](https://discuss.tvm.apache.org/email/unsubscribe/a3d5e28716962a6e1d9e252d9cee8d1f9b7f0e670fb06a85ceb1752e956370b1).