Hi Experts, @tqchen, @kazum
I tried to auto tune the sample Resnet model on iOS metal target , I went through the (https://discuss.tvm.apache.org/t/auto-tvm-how-to-auto-tune-the-model-on-ios-device/7681). while trying to tune the model every time I see **"Current/Best: 0.00/ 0.00 GFLOPS** " in the logs and showing Test Succeeded. I tried to tune for a while (ntrail=100) and compared the results of before and after tuning the resnet model. I didn't see any difference in optimization and while picking best records form cache file (**autotvm.record.pick_best(tmp_log_file, log_filename)** log_filename is having the zero bytes (it did not picked any logs from the tuned log file). please find results the before and after tuned resnet model. With Tuning Resnet model : Mean inference Time & std dev = 54.05 ms & 1.27 ms Without Tuning Resnet model : Mean inference Time & std dev = 51.07 ms & 0.14 ms **Code :** target = 'metal' proxy_port = 9090 key = "iphone" arch = "arm64" sdk = "iphoneos" target_host = "llvm -mtriple=%s-apple-darwin" % arch @tvm.register_func("tvm_callback_metal_compile") def compile_metal(src): return xcode.compile_metal(src, sdk=sdk) #### TUNING OPTION #### network = 'resnet-18' log_file = "%s.log" % network dtype = 'float32' autotvm.measure.measure_methods.check_remote = lambda *args: True def fcompile(*args): from tvm.contrib import xcode xcode.create_dylib(*args, arch=arch, sdk=sdk) path = args[0] xcode.codesign(path) # xcode.popen_test_rpc(proxy_host, proxy_port, key, # destination=destination, # libs=[path]) fcompile.output_format = "dylib" tuning_option = { 'log_filename': log_file, 'tuner': 'xgb', 'early_stopping': None, 'measure_option': autotvm.measure_option( builder=autotvm.LocalBuilder( n_parallel=1, build_func=fcompile, timeout=60 ), runner=autotvm.RPCRunner( key, host='127.0.0.1', port=9190, number=20, repeat=3, timeout=60, min_repeat_ms=150) ), } def tune_tasks(tasks, measure_option, tuner='xgb', n_trial=100, early_stopping=None, log_filename='tuning.log', use_transfer_learning=False): # create tmp log file tmp_log_file = log_filename + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " %(i+1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=100) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) # do tuning tsk_trial = min(n_trial, len(tsk.config_space)) tuner_obj.tune(n_trial=tsk_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(tsk_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file) ]) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file) ######################################################################## # Finally, we launch tuning jobs and evaluate the end-to-end performance. def tune_and_evaluate(tuning_opt): # extract workloads from relay program print("Extract tasks...") mod, params, input_shape, out_shape = get_network(network, batch_size=1) tasks = autotvm.task.extract_from_program(mod["main"], target=target, params=params, target_host=target_host, ops=(relay.op.get("nn.conv2d"),)) # run tuning tasks print("Tuning...") tune_tasks(tasks, **tuning_opt) with autotvm.apply_history_best(log_file): print("Compile...") with tvm.transform.PassContext(opt_level=3): graph, lib, params = relay.build_module.build( mod, target=target, params=params) # export library path_dso = "tuned_deploy.dylib" lib.export_library(path_dso, xcode.create_dylib, arch=arch, sdk=sdk) xcode.codesign(path_dso) # Evaluate inference cost on tuned lib xcode.popen_test_rpc(proxy_host, proxy_port, device_key, destination=destination, libs=[path_dso]) remote = autotvm.measure.request_remote(device_key, '0.0.0.0', 9190, timeout=10000) # Upload not needed for ios because dylib is built into app # remote.upload(path_dso) rlib = remote.load_module(path_dso) ctx = remote.metal(0) module = runtime.create(graph, rlib, ctx) data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype)) module.set_input('data', data_tvm) module.set_input(**params) # evaluate print("Evaluate inference time cost...") ftimer = module.module.time_evaluator("run", ctx, number=3, repeat=20) prof_res = np.array(ftimer().results) * 1000 # convert to millisecond print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)) Any idea what is happening and how to fix the above issue. can you please help me to understand and fix the above issue. Thanks --- [Visit Topic](https://discuss.tvm.apache.org/t/ios-auto-tvm-auto-tuning-was-not-happening-for-ios-metal-target/8115/1) to respond. You are receiving this because you enabled mailing list mode. To unsubscribe from these emails, [click here](https://discuss.tvm.apache.org/email/unsubscribe/81c22eb560f2b886ce46c7e0b49d90c48e7cffdf1e055ead55047698d10fda56).