[Apache TVM Discuss] [Questions] Auto-scheduling for lstm operator

jialipang via Apache TVM Discuss Mon, 12 Oct 2020 19:59:40 -0700


Hi Experts,  @Lianminzheng @jcf94 
 I tried to define a new operator for lstm network.The computation declaration 
for lstm op has been tested and it is correct. Now I want to use 
auto-scheduling to automatically generate a large search space and find a good 
schedule in the space. But it can not generate the schedule successfully, my 
code is here:


```
from tvm import topi

def unbind_func(data):
    input_list = topi.split(data, indices_or_sections=data.shape[0].value, 
axis=0)
    input_sq_list = []
    for item in input_list:
        input_sq = topi.squeeze(item, axis=0)
        input_sq_list.append(input_sq)
    return input_sq_list

def lstm_layer(data, hx, cx, w_ih, w_hh, b_ih, b_hh, out_dtype=None):
    """The default implementation of lstm_layer in topi.

    Parameters
    ----------
    data : tvm.te.Tensor
        3-D with shape [x, y, z]

    hx : tvm.te.Tensor
        2-D with shape [a, b]

    cx : tvm.te.Tensor
        2-D with shape [a, b]

    w_ih : tvm.te.Tensor
        2-D with shape

    w_hh : tvm.te.Tensor
        2-D with shape

    b_ih : tvm.te.Tensor
        1-D with shape

    b_hh : tvm.te.Tensor
        1-D with shape

    out_dtype : str
        The output type. This is used for mixed precision.

    Returns
    -------
    output : tvm.te.Tensor
        3-D with shape
    hy: tvm.te.Tensor
        3-D with shape
    cy: tvm.te.Tensor
        3-D with shape
    """
    assert len(data.shape) == 3 and len(hx.shape) == 2 and len(cx.shape) == 2 
and len(w_ih.shape) == 2 \
           and len(w_hh.shape) == 2 and len(b_ih.shape) == 1 and 
len(b_hh.shape) == 1, "only support 2-dim dense"

    if out_dtype is None:
        out_dtype = data.dtype

    # unbind input data
    input_list = unbind_func(data)
    step_outputs = []
    for input in input_list:
        """input is 2D tensor"""
        linear_ih = topi.nn.dense(input, w_ih, b_ih)
        linear_hh = topi.nn.dense(hx, w_hh, b_hh)
        gates = topi.add(linear_ih, linear_hh)
        chunked_gates = topi.split(gates, indices_or_sections=4, axis=1)
        assert (len(chunked_gates) == 4)
        in_gate = topi.sigmoid(chunked_gates[0])
        forget_gate = topi.sigmoid(chunked_gates[1])
        cell_gate = topi.tanh(chunked_gates[2])
        out_gate = topi.sigmoid(chunked_gates[3])
        cy = topi.add(topi.multiply(forget_gate, cx), topi.multiply(in_gate, 
cell_gate))
        hy = topi.multiply(out_gate, topi.tanh(cy))

        step_outputs.append(hy)
        hx = hy
        cx = cy
    output = topi.stack(step_outputs, axis=0)
    return output

```
 
```
import tvm
from tvm import te, auto_scheduler, topi

@auto_scheduler.register_workload
def lstm_layers(hx, cx, w_ih, w_hh, b_ih, b_hh):
    data = te.placeholder((2, 1, 240), name="data")
    out = topi.nn.lstm_layer(data, hx, cx, w_ih, w_hh, b_ih, b_hh, 
out_dtype="float32")

    return [data, hx, cx, w_ih, w_hh, b_ih, b_hh, out]

target = tvm.target.Target("cuda")

# the layer in lstm
hx = te.placeholder((1, 1024), name='hx')
cx = te.placeholder((1, 1024), name='cx')
w_ih = te.placeholder((4096, 240), name='w_ih')
w_hh = te.placeholder((4096, 1024), name='w_hh')
b_ih = te.placeholder((4096,), name='b_ih')
b_hh = te.placeholder((4096,), name='b_hh')
task = auto_scheduler.create_task(lstm_layers, (hx, cx, w_ih, w_hh, b_ih, 
b_hh), target)

# Inspect the computational graph
print(task.compute_dag)

measure_ctx = auto_scheduler.LocalRPCMeasureContext(min_repeat_ms=300)
tune_option = auto_scheduler.TuningOptions(
    num_measure_trials=1,
    runner=measure_ctx.runner,
    measure_callbacks=[auto_scheduler.RecordToFile("lstm_layers.json")],
)

sch, args = auto_scheduler.auto_schedule(task, tuning_options=tune_option)

print(tvm.lower(sch, list(args), simple_mode=True))
```



Only the data = te.placeholder((1, 1, 240), name="data"), the schedule can be 
generated successfully, when data = te.placeholder((?, 1, 240), 
name="data")(and ?>1), the DAGgraph can be obtained and it shows "Get devices 
for measurement successfully!" , but the schedule can not generate 
successfully. The elaborate error is following:
```
Traceback (most recent call last):
  File 
"/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/tutorials/auto_scheduler/tune_lstm_layers.py",
 line 109, in <module>
    sch, args = auto_scheduler.auto_schedule(task, tuning_options=tune_option)
  File 
"/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/python/tvm/auto_scheduler/auto_schedule.py",
 line 213, in auto_schedule
    sch, tensors = _ffi_api.AutoSchedule(search_policy, tuning_options)
  File 
"/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/python/tvm/_ffi/_ctypes/packed_func.py",
 line 237, in __call__
    raise get_last_ffi_error()
tvm._ffi.base.TVMError: Traceback (most recent call last):
  [bt] (7) 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(TVMFuncCall+0x61)
 [0x7fa9afe26ec1]
  [bt] (6) 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(+0xacaacd)
 [0x7fa9af1cdacd]
  [bt] (5) 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::auto_scheduler::AutoSchedule(tvm::auto_scheduler::SearchPolicy,
 tvm::auto_scheduler::TuningOptions)+0x116) [0x7fa9af1cd1b6]
  [bt] (4) 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::auto_scheduler::SketchPolicyNode::Search(int,
 int, int, tvm::auto_scheduler::ProgramMeasurer)+0xa82) [0x7fa9af262f52]
  [bt] (3) 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::auto_scheduler::SketchPolicyNode::SearchOneRound(int,
 tvm::runtime::Array<tvm::auto_scheduler::State, void>*)+0x1c3) [0x7fa9af261f83]
  [bt] (2) 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::auto_scheduler::SketchPolicyNode::SampleInitPopulation(tvm::runtime::Array<tvm::auto_scheduler::State,
 void> const&, int)+0x21e) [0x7fa9af25d39e]
  [bt] (1) 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::support::parallel_for(int,
 int, std::function<void (int)> const&, int, 
std::function<std::vector<std::vector<int, std::allocator<int> >, 
std::allocator<std::vector<int, std::allocator<int> > > > (int, int, int, 
int)>)+0x1273) [0x7fa9af7fb413]
  [bt] (0) 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x5f)
 [0x7fa9af1d171f]
  [bt] (8) /usr/lib/x86_64-linux-gnu/libstdc++.so.6(+0xbd6df) [0x7fa9ab9976df]
  [bt] (7) 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(std::thread::_State_impl<std::_Bind_simple<std::packaged_task<void
 (std::vector<int, std::allocator<int> > const&, std::function<void (int)> 
const&)> (std::vector<int, std::allocator<int> >, std::function<void (int)>)> 
>::_M_run()+0xd3) [0x7fa9af7fbb13]
  [bt] (6) 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(void
 std::call_once<void 
(std::__future_base::_State_baseV2::*)(std::function<std::unique_ptr<std::__future_base::_Result_base,
 std::__future_base::_Result_base::_Deleter> ()>*, bool*), 
std::__future_base::_State_baseV2*, 
std::function<std::unique_ptr<std::__future_base::_Result_base, 
std::__future_base::_Result_base::_Deleter> ()>*, bool*>(std::once_flag&, void 
(std::__future_base::_State_baseV2::*&&)(std::function<std::unique_ptr<std::__future_base::_Result_base,
 std::__future_base::_Result_base::_Deleter> ()>*, bool*), 
std::__future_base::_State_baseV2*&&, 
std::function<std::unique_ptr<std::__future_base::_Result_base, 
std::__future_base::_Result_base::_Deleter> ()>*&&, bool*&&)+0x71) 
[0x7fa9af7fba01]
  [bt] (5) /lib/x86_64-linux-gnu/libpthread.so.0(+0xf827) [0x7fa9d5c09827]
  [bt] (4) 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(std::__future_base::_State_baseV2::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base,
 std::__future_base::_Result_base::_Deleter> ()>*, bool*)+0x29) [0x7fa9af7fb8e9]
  [bt] (3) 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(+0x10f6072)
 [0x7fa9af7f9072]
  [bt] (2) 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(+0xb5a0f2)
 [0x7fa9af25d0f2]
  [bt] (1) 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::auto_scheduler::InitThreadBind::Apply(tvm::auto_scheduler::SketchPolicyNode*,
 tvm::auto_scheduler::State*, std::mersenne_twister_engine<unsigned long, 32ul, 
624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 
4022730752ul, 18ul, 1812433253ul>*) const+0x2f9) [0x7fa9af2743e9]
  [bt] (0) 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(+0xb671ff)
 [0x7fa9af26a1ff]
  File 
"/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/src/support/parallel_for.cc",
 line 92
TVMError: Parallel_for error with [21:43:57] 
/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/src/auto_scheduler/search_policy/sketch_policy_rules.cc:710:
 Check failed: HasCrossThreadReduction(*state, stage_id): 
```





---
[Visit 
Topic](https://discuss.tvm.apache.org/t/auto-scheduling-for-lstm-operator/8158/1)
 to respond.

You are receiving this because you enabled mailing list mode.

To unsubscribe from these emails, [click 
here](https://discuss.tvm.apache.org/email/unsubscribe/ca81268d210ede71ef87554d1f4af70f751186762aff4965793c9470b8200636).

[Apache TVM Discuss] [Questions] Auto-scheduling for lstm operator

Reply via email to