[Apache TVM Discuss] [Questions] [iOS- Auto Tvm] Auto Tuning was not happening for iOS metal target

Dileep via Apache TVM Discuss Wed, 07 Oct 2020 21:53:24 -0700


Hi Experts,
@tqchen, @kazum


I  tried to auto tune the sample Resnet model on iOS metal target , I went 
through the  
(https://discuss.tvm.apache.org/t/auto-tvm-how-to-auto-tune-the-model-on-ios-device/7681).
while trying to tune the model every time I see  **"Current/Best: 0.00/ 0.00 
GFLOPS** " in the logs and showing Test Succeeded. I tried to tune for a while 
(ntrail=100) and compared the results of before and after tuning the resnet 
model. I didn't see any difference in optimization and while picking best 
records form cache file (**autotvm.record.pick_best(tmp_log_file, 
log_filename)**  log_filename is having the zero bytes (it did not picked any 
logs from the tuned log file). please find results the before and after tuned 
resnet model.

With Tuning Resnet model : Mean inference Time & std dev = 54.05 ms & 1.27 ms

Without Tuning Resnet model : Mean inference Time & std dev = 51.07 ms & 0.14 ms

**Code :** 

    target = 'metal'
    proxy_port = 9090
    key = "iphone"
    arch = "arm64"
    sdk = "iphoneos"
    target_host = "llvm -mtriple=%s-apple-darwin" % arch

    @tvm.register_func("tvm_callback_metal_compile")
    def compile_metal(src):
        return xcode.compile_metal(src, sdk=sdk)

    #### TUNING OPTION ####
    network = 'resnet-18'
    log_file = "%s.log" % network
    dtype = 'float32'

    autotvm.measure.measure_methods.check_remote = lambda *args: True

    def fcompile(*args):
        from tvm.contrib import xcode
        xcode.create_dylib(*args, arch=arch, sdk=sdk)
        path = args[0]
        xcode.codesign(path)
       #  xcode.popen_test_rpc(proxy_host, proxy_port, key,
       #                  destination=destination,
       #                  libs=[path])

    fcompile.output_format = "dylib"

    tuning_option = {
        'log_filename': log_file,
        'tuner': 'xgb',
        'early_stopping': None,

        'measure_option': autotvm.measure_option(
            builder=autotvm.LocalBuilder(
                n_parallel=1,
                build_func=fcompile,
                timeout=60
            ),
            runner=autotvm.RPCRunner(
                key, host='127.0.0.1', port=9190,
                number=20, repeat=3, timeout=60, min_repeat_ms=150)
        ),
    }
    def tune_tasks(tasks,
                   measure_option,
                   tuner='xgb',
                   n_trial=100,
                   early_stopping=None,
                   log_filename='tuning.log',
                   use_transfer_learning=False):
        # create tmp log file
        tmp_log_file = log_filename + ".tmp"
        if os.path.exists(tmp_log_file):
            os.remove(tmp_log_file)

        for i, tsk in enumerate(reversed(tasks)):
            prefix = "[Task %2d/%2d] " %(i+1, len(tasks))

            # create tuner
            if tuner == 'xgb' or tuner == 'xgb-rank':
                tuner_obj = XGBTuner(tsk, loss_type='rank')
            elif tuner == 'ga':
                tuner_obj = GATuner(tsk, pop_size=100)
            elif tuner == 'random':
                tuner_obj = RandomTuner(tsk)
            elif tuner == 'gridsearch':
                tuner_obj = GridSearchTuner(tsk)
            else:
                raise ValueError("Invalid tuner: " + tuner)

            if use_transfer_learning:
                if os.path.isfile(tmp_log_file):
                    
tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))

            # do tuning
            tsk_trial = min(n_trial, len(tsk.config_space))
            tuner_obj.tune(n_trial=tsk_trial,
                           early_stopping=early_stopping,
                           measure_option=measure_option,
                           callbacks=[
                               autotvm.callback.progress_bar(tsk_trial, 
prefix=prefix),
                               autotvm.callback.log_to_file(tmp_log_file)
                           ])

        # pick best records to a cache file
        autotvm.record.pick_best(tmp_log_file, log_filename)
        os.remove(tmp_log_file)


    ########################################################################
    # Finally, we launch tuning jobs and evaluate the end-to-end performance.

    def tune_and_evaluate(tuning_opt):
        # extract workloads from relay program
        print("Extract tasks...")
        mod, params, input_shape, out_shape = get_network(network, batch_size=1)

        tasks = autotvm.task.extract_from_program(mod["main"], target=target,
                                                  params=params,
                                                  target_host=target_host,
                                                  
ops=(relay.op.get("nn.conv2d"),))

        # run tuning tasks
        print("Tuning...")
        tune_tasks(tasks, **tuning_opt) 
        with autotvm.apply_history_best(log_file):
        print("Compile...")
        with tvm.transform.PassContext(opt_level=3):
            graph, lib, params = relay.build_module.build(
                mod, target=target, params=params)

        # export library
        path_dso = "tuned_deploy.dylib"
        lib.export_library(path_dso, xcode.create_dylib, arch=arch, sdk=sdk)
        xcode.codesign(path_dso)

        # Evaluate inference cost on tuned lib
        xcode.popen_test_rpc(proxy_host, proxy_port, device_key, 
destination=destination, libs=[path_dso])

        remote = autotvm.measure.request_remote(device_key, '0.0.0.0', 9190,
                                                timeout=10000)

        # Upload not needed for ios because dylib is built into app
        # remote.upload(path_dso)

        rlib = remote.load_module(path_dso)

        ctx = remote.metal(0)
        
        module = runtime.create(graph, rlib, ctx)
        data_tvm = 
tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
        module.set_input('data', data_tvm)
        module.set_input(**params)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=3, repeat=20)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res))

Any idea what is happening and how to fix the above issue. 

can you please help me to understand and fix the above issue.

Thanks





---
[Visit 
Topic](https://discuss.tvm.apache.org/t/ios-auto-tvm-auto-tuning-was-not-happening-for-ios-metal-target/8115/1)
 to respond.

You are receiving this because you enabled mailing list mode.

To unsubscribe from these emails, [click 
here](https://discuss.tvm.apache.org/email/unsubscribe/81c22eb560f2b886ce46c7e0b49d90c48e7cffdf1e055ead55047698d10fda56).

[Apache TVM Discuss] [Questions] [iOS- Auto Tvm] Auto Tuning was not happening for iOS metal target

Reply via email to