Hi everyone,

I'm currently trying to tensorize the schedule for a very simple  [4,4] matrix 
element-wise sum (add) to be performed in 4 [2,2] matrix addition steps by an 
intrinsic function. I've looked into adapting the tutorial on 
[Tensorization](https://tvm.apache.org/docs/tutorials/language/tensorize.html) 
but I cannot get a schedule that compiles as I think I'm not correctly creating 
buffers (the code gets stuck in the StorageFlattener step). Currently I came up 
with this schedule, but it doesn't compile (python source code below).:
```
primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
  attr = {"global_symbol": "main", "tir.noalias": True}
  buffers = {C: Buffer(C_2: Pointer(float32), float32, [4, 4], []),
             B: Buffer(B_2: Pointer(float32), float32, [4, 4], []),
             A: Buffer(A_2: Pointer(float32), float32, [4, 4], [])}
  buffer_map = {A_1: A, B_1: B, C_1: C} {
  attr [C] "realize_scope" = "";
  realize(C, [0:4, 0:4], True {
    for (i.outer: int32, 0, 2) {
      for (j.outer: int32, 0, 2) {
        attr [[A_3: Buffer(A_4: Pointer(float32), float32, [2, 2], [2, 1], 
elem_offset=A_elem_offset: int32), A]] "buffer_bind_scope" = 
@tir.tvm_tuple((i.outer*2), 2, (j.outer*2), 2, dtype=handle);
        attr [[B_3: Buffer(B_4: Pointer(float32), float32, [2, 2], [2, 1], 
elem_offset=B_elem_offset: int32), B]] "buffer_bind_scope" = 
@tir.tvm_tuple((i.outer*2), 2, (j.outer*2), 2, dtype=handle);
        attr [[C_3: Buffer(C_4: Pointer(float32), float32, [2, 2], [2, 1], 
elem_offset=C_elem_offset: int32), C]] "buffer_bind_scope" = 
@tir.tvm_tuple((i.outer*2), 2, (j.outer*2), 2, dtype=handle);
        @tir.call_extern("ews", 
                @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_4, 
C_elem_offset, 4, 2, dtype=handle), 
                @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_4, 
A_elem_offset, 4, 1, dtype=handle), 
                @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_4, 
B_elem_offset, 4, 1, dtype=handle), 
            2, 
            2, 
            2, 
            dtype=float32)
      }
    }
  })
}
```
Python source:
```
from __future__ import absolute_import, print_function

import tvm
from tvm import te


def intrin_ews(ro,co,data_type):
    a = te.placeholder((ro,co), dtype=data_type, name="a")
    b = te.placeholder((ro,co), dtype=data_type, name="b")
    c = te.compute((ro,co), lambda i,j: a[i,j] + b[i,j], name="c")

    # Preview a generic schedule
    #preview = te.create_schedule(c.op)
    #print(tvm.lower(preview, [a, b, c], simple_mode=True))

    # Define buffers
    # Offset factor --> optimize for vectorized buffering
    Ab = tvm.tir.decl_buffer(a.shape, a.dtype, name="A", offset_factor=1, 
strides=[2,1])
    Bb = tvm.tir.decl_buffer(b.shape, b.dtype, name="B", offset_factor=1, 
strides=[2,1])
    Cb = tvm.tir.decl_buffer(c.shape, c.dtype, name="C", offset_factor=1, 
strides=[2,1])

    def intrin_func(ins, outs):
        # create IR builder
        ib = tvm.tir.ir_builder.create()
        aa, bb = ins
        cc = outs[0]
        ib.emit(
            tvm.tir.call_extern(
                "float32",
                "ews",
                cc.access_ptr("w"),
                aa.access_ptr("r"),
                bb.access_ptr("r"),
                ro,
                co,
                bb.strides[0],
            )
        )
        return ib.get()

    return te.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb})


rows = 2
cols = 2
data_type = "float32"
# Create an instance
intrinsic = intrin_ews(rows,cols,data_type)

ro = 4
co = 4
# Create a tensorizable schedule
A = te.placeholder((ro,co), dtype=data_type, name="A")
B = te.placeholder((ro,co), dtype=data_type, name="B")
C = te.compute((ro,co), lambda i,j: A[i,j] + B[i,j], name="C")
# Create a vanilla schedule
s = te.create_schedule(C.op)
xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1],x_factor=2,y_factor=2)
print(tvm.lower(s, [A, B, C], simple_mode=True))
# Get a handle to the axis
# x, y = s[C].op.axis
# Tensorize!
s[C].tensorize(xi, intrinsic)
print(tvm.lower(s, [A, B, C], simple_mode=True))
```

The stack trace of the error:
```
tvm._ffi.base.TVMError: Traceback (most recent call last):
  [bt] (8) 
/home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::NodeFunctor<tvm::tir::Stmt
 (tvm::runtime::ObjectRef const&, tvm::tir::StmtFunctor<tvm::tir::Stmt 
(tvm::tir::Stmt const&)>*)>::operator()(tvm::runtime::ObjectRef const&, 
tvm::tir::StmtFunctor<tvm::tir::Stmt (tvm::tir::Stmt const&)>*) const+0x11d) 
[0x7f9e3176df4d]
  [bt] (7) 
/home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::tir::StmtFunctor<tvm::tir::Stmt
 (tvm::tir::Stmt const&)>::InitVTable()::{lambda(tvm::runtime::ObjectRef 
const&, tvm::tir::StmtFunctor<tvm::tir::Stmt (tvm::tir::Stmt 
const&)>*)#2}::_FUN(tvm::runtime::ObjectRef const&, 
tvm::tir::StmtFunctor<tvm::tir::Stmt (tvm::tir::Stmt const&)>*)+0x26) 
[0x7f9e31766516]
  [bt] (6) 
/home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::tir::StorageFlattener::VisitStmt_(tvm::tir::AttrStmtNode
 const*)+0x333) [0x7f9e31d09193]
  [bt] (5) 
/home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::tir::StorageFlattener::HandleBufferBindScope(tvm::tir::AttrStmtNode
 const*)+0xbaf) [0x7f9e31d02f1f]
  [bt] (4) 
/home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::tir::ArgBinder::BindBuffer(tvm::tir::Buffer
 const&, tvm::tir::Buffer const&, std::__cxx11::basic_string<char, 
std::char_traits<char>, std::allocator<char> > const&, bool)+0xb6a) 
[0x7f9e31c3d8aa]
  [bt] (3) 
/home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::tir::ArgBinder::BindArray(tvm::runtime::Array<tvm::PrimExpr,
 void> const&, tvm::runtime::Array<tvm::PrimExpr, void> const&, 
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > 
const&)+0x478) [0x7f9e31c3cb58]
  [bt] (2) 
/home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::tir::ArgBinder::Bind_(tvm::PrimExpr
 const&, tvm::PrimExpr const&, std::__cxx11::basic_string<char, 
std::char_traits<char>, std::allocator<char> > const&, bool)+0x24c) 
[0x7f9e31c3c06c]
  [bt] (1) 
/home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::tir::BinderAddAssert(tvm::arith::Analyzer*,
 tvm::PrimExpr, std::__cxx11::basic_string<char, std::char_traits<char>, 
std::allocator<char> > const&, std::vector<tvm::tir::Stmt, 
std::allocator<tvm::tir::Stmt> >*)+0xe0) [0x7f9e31c3ba90]
  [bt] (0) 
/home/josse/.pyenv/versions/3.7.7/envs/tvm-sirius/lib/python3.7/site-packages/tvm-0.8.dev609+g57c467fed-py3.7-linux-x86_64.egg/tvm/libtvm.so(+0xad88a6)
 [0x7f9e31c3b8a6]
  File "/home/josse/Thesis/tvm-fork/tvm-fork/src/tir/transforms/arg_binder.cc", 
line 40
TVMError: Bind have an unmet assertion: (bool)0,  on argument A.strides[0]
```

I think I'm not correctly using the "buffer_bind_scope" instruction here.
I'm not surehow I should make it work, any solutions are comments are very much 
appreciated! 
If someone can explain me or give me some pointers on  how the buffer bind 
scope works that would be great!
Thanks!





---
[Visit 
Topic](https://discuss.tvm.apache.org/t/te-tensorize-elementwise-sum/9335/1) to 
respond.

You are receiving this because you enabled mailing list mode.

To unsubscribe from these emails, [click 
here](https://discuss.tvm.apache.org/email/unsubscribe/a3d5e28716962a6e1d9e252d9cee8d1f9b7f0e670fb06a85ceb1752e956370b1).

Reply via email to