Hi Experts, @Lianminzheng @jcf94 I tried to define a new operator for lstm network.The computation declaration for lstm op has been tested and it is correct. Now I want to use auto-scheduling to automatically generate a large search space and find a good schedule in the space. But it can not generate the schedule successfully, my code is here:
``` from tvm import topi def unbind_func(data): input_list = topi.split(data, indices_or_sections=data.shape[0].value, axis=0) input_sq_list = [] for item in input_list: input_sq = topi.squeeze(item, axis=0) input_sq_list.append(input_sq) return input_sq_list def lstm_layer(data, hx, cx, w_ih, w_hh, b_ih, b_hh, out_dtype=None): """The default implementation of lstm_layer in topi. Parameters ---------- data : tvm.te.Tensor 3-D with shape [x, y, z] hx : tvm.te.Tensor 2-D with shape [a, b] cx : tvm.te.Tensor 2-D with shape [a, b] w_ih : tvm.te.Tensor 2-D with shape w_hh : tvm.te.Tensor 2-D with shape b_ih : tvm.te.Tensor 1-D with shape b_hh : tvm.te.Tensor 1-D with shape out_dtype : str The output type. This is used for mixed precision. Returns ------- output : tvm.te.Tensor 3-D with shape hy: tvm.te.Tensor 3-D with shape cy: tvm.te.Tensor 3-D with shape """ assert len(data.shape) == 3 and len(hx.shape) == 2 and len(cx.shape) == 2 and len(w_ih.shape) == 2 \ and len(w_hh.shape) == 2 and len(b_ih.shape) == 1 and len(b_hh.shape) == 1, "only support 2-dim dense" if out_dtype is None: out_dtype = data.dtype # unbind input data input_list = unbind_func(data) step_outputs = [] for input in input_list: """input is 2D tensor""" linear_ih = topi.nn.dense(input, w_ih, b_ih) linear_hh = topi.nn.dense(hx, w_hh, b_hh) gates = topi.add(linear_ih, linear_hh) chunked_gates = topi.split(gates, indices_or_sections=4, axis=1) assert (len(chunked_gates) == 4) in_gate = topi.sigmoid(chunked_gates[0]) forget_gate = topi.sigmoid(chunked_gates[1]) cell_gate = topi.tanh(chunked_gates[2]) out_gate = topi.sigmoid(chunked_gates[3]) cy = topi.add(topi.multiply(forget_gate, cx), topi.multiply(in_gate, cell_gate)) hy = topi.multiply(out_gate, topi.tanh(cy)) step_outputs.append(hy) hx = hy cx = cy output = topi.stack(step_outputs, axis=0) return output ``` ``` import tvm from tvm import te, auto_scheduler, topi @auto_scheduler.register_workload def lstm_layers(hx, cx, w_ih, w_hh, b_ih, b_hh): data = te.placeholder((2, 1, 240), name="data") out = topi.nn.lstm_layer(data, hx, cx, w_ih, w_hh, b_ih, b_hh, out_dtype="float32") return [data, hx, cx, w_ih, w_hh, b_ih, b_hh, out] target = tvm.target.Target("cuda") # the layer in lstm hx = te.placeholder((1, 1024), name='hx') cx = te.placeholder((1, 1024), name='cx') w_ih = te.placeholder((4096, 240), name='w_ih') w_hh = te.placeholder((4096, 1024), name='w_hh') b_ih = te.placeholder((4096,), name='b_ih') b_hh = te.placeholder((4096,), name='b_hh') task = auto_scheduler.create_task(lstm_layers, (hx, cx, w_ih, w_hh, b_ih, b_hh), target) # Inspect the computational graph print(task.compute_dag) measure_ctx = auto_scheduler.LocalRPCMeasureContext(min_repeat_ms=300) tune_option = auto_scheduler.TuningOptions( num_measure_trials=1, runner=measure_ctx.runner, measure_callbacks=[auto_scheduler.RecordToFile("lstm_layers.json")], ) sch, args = auto_scheduler.auto_schedule(task, tuning_options=tune_option) print(tvm.lower(sch, list(args), simple_mode=True)) ``` Only the data = te.placeholder((1, 1, 240), name="data"), the schedule can be generated successfully, when data = te.placeholder((?, 1, 240), name="data")(and ?>1), the DAGgraph can be obtained and it shows "Get devices for measurement successfully!" , but the schedule can not generate successfully. The elaborate error is following: ``` Traceback (most recent call last): File "/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/tutorials/auto_scheduler/tune_lstm_layers.py", line 109, in <module> sch, args = auto_scheduler.auto_schedule(task, tuning_options=tune_option) File "/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/python/tvm/auto_scheduler/auto_schedule.py", line 213, in auto_schedule sch, tensors = _ffi_api.AutoSchedule(search_policy, tuning_options) File "/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/python/tvm/_ffi/_ctypes/packed_func.py", line 237, in __call__ raise get_last_ffi_error() tvm._ffi.base.TVMError: Traceback (most recent call last): [bt] (7) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(TVMFuncCall+0x61) [0x7fa9afe26ec1] [bt] (6) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(+0xacaacd) [0x7fa9af1cdacd] [bt] (5) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::auto_scheduler::AutoSchedule(tvm::auto_scheduler::SearchPolicy, tvm::auto_scheduler::TuningOptions)+0x116) [0x7fa9af1cd1b6] [bt] (4) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::auto_scheduler::SketchPolicyNode::Search(int, int, int, tvm::auto_scheduler::ProgramMeasurer)+0xa82) [0x7fa9af262f52] [bt] (3) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::auto_scheduler::SketchPolicyNode::SearchOneRound(int, tvm::runtime::Array<tvm::auto_scheduler::State, void>*)+0x1c3) [0x7fa9af261f83] [bt] (2) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::auto_scheduler::SketchPolicyNode::SampleInitPopulation(tvm::runtime::Array<tvm::auto_scheduler::State, void> const&, int)+0x21e) [0x7fa9af25d39e] [bt] (1) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::support::parallel_for(int, int, std::function<void (int)> const&, int, std::function<std::vector<std::vector<int, std::allocator<int> >, std::allocator<std::vector<int, std::allocator<int> > > > (int, int, int, int)>)+0x1273) [0x7fa9af7fb413] [bt] (0) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x5f) [0x7fa9af1d171f] [bt] (8) /usr/lib/x86_64-linux-gnu/libstdc++.so.6(+0xbd6df) [0x7fa9ab9976df] [bt] (7) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(std::thread::_State_impl<std::_Bind_simple<std::packaged_task<void (std::vector<int, std::allocator<int> > const&, std::function<void (int)> const&)> (std::vector<int, std::allocator<int> >, std::function<void (int)>)> >::_M_run()+0xd3) [0x7fa9af7fbb13] [bt] (6) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(void std::call_once<void (std::__future_base::_State_baseV2::*)(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>*, bool*), std::__future_base::_State_baseV2*, std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>*, bool*>(std::once_flag&, void (std::__future_base::_State_baseV2::*&&)(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>*, bool*), std::__future_base::_State_baseV2*&&, std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>*&&, bool*&&)+0x71) [0x7fa9af7fba01] [bt] (5) /lib/x86_64-linux-gnu/libpthread.so.0(+0xf827) [0x7fa9d5c09827] [bt] (4) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(std::__future_base::_State_baseV2::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>*, bool*)+0x29) [0x7fa9af7fb8e9] [bt] (3) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(+0x10f6072) [0x7fa9af7f9072] [bt] (2) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(+0xb5a0f2) [0x7fa9af25d0f2] [bt] (1) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::auto_scheduler::InitThreadBind::Apply(tvm::auto_scheduler::SketchPolicyNode*, tvm::auto_scheduler::State*, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>*) const+0x2f9) [0x7fa9af2743e9] [bt] (0) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(+0xb671ff) [0x7fa9af26a1ff] File "/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/src/support/parallel_for.cc", line 92 TVMError: Parallel_for error with [21:43:57] /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/src/auto_scheduler/search_policy/sketch_policy_rules.cc:710: Check failed: HasCrossThreadReduction(*state, stage_id): ``` --- [Visit Topic](https://discuss.tvm.apache.org/t/auto-scheduling-for-lstm-operator/8158/1) to respond. You are receiving this because you enabled mailing list mode. To unsubscribe from these emails, [click here](https://discuss.tvm.apache.org/email/unsubscribe/ca81268d210ede71ef87554d1f4af70f751186762aff4965793c9470b8200636).