Hi @kazum - Thank you for the previous suggestions, I am also looking at how to use autotvm to tune a model on iOS.
Below is a modified version of 'tutorials/autotvm/tune_relay_arm.py' that is based on your previous comment suggestion of adding a build_func, but something isn't working quite right yet. Tuning tasks are stuck at 0 GFLOPS and the tuning trials time out. > [Task 1/12] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/100) | 0.00 > s If I skip tuning (remove '#tune_tasks(tasks, **tuning_opt'), it successfully builds and runs the untuned model and reports an inference result. Any idea what step might be missing here? Thank you! 0. Assumption: you have a single macOS based host running the rpc proxy, tracker and xcode, with local network IP: 192.168.0.10 1. Setup environment variables: export TVM_IOS_CODESIGN='Apple Development: <y...@email.com> (<SIGNINGCODE>)' export TVM_IOS_RPC_ROOT=${TVM_HOME}/apps/ios_rpc export TVM_IOS_RPC_PROXY_HOST=192.168.0.10 #export TVM_IOS_RPC_DESTINATION='platform=iOS Simulator,id=<simulator id>' export TVM_IOS_RPC_DESTINATION='platform=iOS,id=<ios device id>' 2. Start the tracker python3 -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190 --no-fork INFO:RPCTracker:bind to 0.0.0.0:9190 3. Start the rpc proxy and point it to the tracker python3 -m tvm.exec.rpc_proxy --host 0.0.0.0 --tracker 0.0.0.0:9190 --no-fork INFO:root:RPCProxy: client port bind to 0.0.0.0:9090 4. Run tuning: cd ${TVM_HOME}/apps/ios_rpc python3 tests/tune_relay_ios.py **Code:** ``` """ apps/ios_rpc/tests/tune_relay_ios.py Auto-tuning a convolutional network for iPhone CPU =============================================== """ import os import numpy as np import tvm from tvm import te from tvm import autotvm from tvm import relay import tvm.relay.testing from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner from tvm.contrib.util import tempdir import tvm.contrib.graph_runtime as runtime from tvm.contrib import xcode ################################################################# # Define network # -------------- # First we need to define the network in relay frontend API. # We can load some pre-defined network from :code:`relay.testing`. # We can also load models from MXNet, ONNX and TensorFlow. def get_network(name, batch_size): """Get the symbol definition and random weight of a network""" input_shape = (batch_size, 3, 224, 224) output_shape = (batch_size, 1000) if "resnet" in name: n_layer = int(name.split('-')[1]) mod, params = relay.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size, dtype=dtype) elif "vgg" in name: n_layer = int(name.split('-')[1]) mod, params = relay.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size, dtype=dtype) elif name == 'mobilenet': mod, params = relay.testing.mobilenet.get_workload(batch_size=batch_size) elif name == 'squeezenet_v1.1': mod, params = relay.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1', dtype=dtype) elif name == 'inception_v3': input_shape = (1, 3, 299, 299) mod, params = relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype) elif name == 'mxnet': # an example for mxnet model from mxnet.gluon.model_zoo.vision import get_model block = get_model('resnet18_v1', pretrained=True) mod, params = relay.frontend.from_mxnet(block, shape={'data': input_shape}, dtype=dtype) net = mod["main"] net = relay.Function(net.params, relay.nn.softmax(net.body), None, net.type_params, net.attrs) mod = tvm.IRModule.from_expr(net) else: raise ValueError("Unsupported network: " + name) return mod, params, input_shape, output_shape ################################################################# # Start RPC Tracker # ------------------ # python3 -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190 --no-fork # # - Autotvm will use the tracker to orchestrate tuning test runs. # # Start RPC Proxy # python3 -m tvm.exec.rpc_proxy --host 0.0.0.0 --tracker 0.0.0.0:9190 --no-fork ########################################### # Set Tuning Options # ------------------ #### DEVICE CONFIG #### # Set to be address of tvm proxy. proxy_host = os.environ["TVM_IOS_RPC_PROXY_HOST"] # Set your desination via env variable. # Should in format "platform=iOS,id=<the test device uuid>" destination = os.environ["TVM_IOS_RPC_DESTINATION"] device_key = 'iphone' proxy_port = 9090 arch = "arm64" sdk = "iphoneos" target = "llvm -mtriple=%s-apple-darwin" % arch target_host = "llvm -mtriple=%s-apple-darwin" % arch #### TUNING OPTION #### network = 'resnet-18' log_file = "%s.%s.log" % (device_key, network) dtype = 'float32' autotvm.measure.measure_methods.check_remote = lambda *args: True def fcompile(*args): xcode.create_dylib(*args, arch=arch, sdk=sdk) path = args[0] xcode.codesign(path) xcode.popen_test_rpc(proxy_host, proxy_port, device_key, destination=destination, libs=[path]) fcompile.output_format = "dylib" tuning_option = { 'log_filename': log_file, 'tuner': 'random', 'early_stopping': None, 'n_trial': 100, 'measure_option': autotvm.measure_option( builder=autotvm.LocalBuilder( n_parallel=1, build_func=fcompile, timeout=60 ), runner=autotvm.RPCRunner( device_key, host='127.0.0.1', port=9190, number=20, repeat=3, timeout=60, min_repeat_ms=150) ), } ################################################################### # Begin Tuning # ------------ def tune_tasks(tasks, measure_option, tuner='random', n_trial=1000, early_stopping=None, log_filename='tuning.log', use_transfer_learning=True): # create tmp log file tmp_log_file = log_filename + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " % (i+1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'xgb_knob': tuner_obj = XGBTuner(tsk, loss_type='rank', feature_type='knob') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=50) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) # do tuning tsk_trial = min(n_trial, len(tsk.config_space)) tuner_obj.tune(n_trial=tsk_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(tsk_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file) ]) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file) ######################################################################## # Finally, we launch tuning jobs and evaluate the end-to-end performance. def tune_and_evaluate(tuning_opt): # extract workloads from relay program print("Extract tasks...") mod, params, input_shape, _ = get_network(network, batch_size=1) tasks = autotvm.task.extract_from_program(mod["main"], target=target, params=params, ops=(relay.op.get("nn.conv2d"),)) # run tuning tasks print("Tuning...") tune_tasks(tasks, **tuning_opt) # compile kernels with history best records with autotvm.apply_history_best(log_file): print("Compile...") with tvm.transform.PassContext(opt_level=3): graph, lib, params = relay.build_module.build( mod, target=target, params=params) # export library path_dso = "tuned_deploy.dylib" lib.export_library(path_dso, xcode.create_dylib, arch=arch, sdk=sdk) xcode.codesign(path_dso) # Evaluate inference cost on tuned lib xcode.popen_test_rpc(proxy_host, proxy_port, device_key, destination=destination, libs=[path_dso]) remote = autotvm.measure.request_remote(device_key, '0.0.0.0', 9190, timeout=10000) # Upload not needed for ios because dylib is built into app # remote.upload(path_dso) rlib = remote.load_module(path_dso) ctx = remote.cpu(0) module = runtime.create(graph, rlib, ctx) data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype)) module.set_input('data', data_tvm) module.set_input(**params) # evaluate print("Evaluate inference time cost...") ftimer = module.module.time_evaluator("run", ctx, number=3, repeat=20) prof_res = np.array(ftimer().results) * 1000 # convert to millisecond print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res))) # We do not run the tuning in our webpage server since it takes too long. # Uncomment the following line to run it by yourself. if __name__ == '__main__': if os.path.exists("rpc_config.txt"): os.remove("rpc_config.txt") tune_and_evaluate(tuning_option) ###################################################################### # Sample Output # ------------- ``` --- [Visit Topic](https://discuss.tvm.ai/t/auto-tvm-how-to-auto-tune-the-model-on-ios-device/7681/9) to respond. You are receiving this because you enabled mailing list mode. To unsubscribe from these emails, [click here](https://discuss.tvm.ai/email/unsubscribe/0943cdede918620ca51b342869bd4913819ecd289bd1bf9a73380a51cfbcfac5).