vandersonmr <vanderson...@gmail.com> writes:
> A new hash map was added to store the accumulated execution > frequency of the TBs even after tb_flush events. A dump > function was also added as a way to visualize these frequencies. > > Signed-off-by: vandersonmr <vanderson...@gmail.com> I forgot to mention the formatting looks a little off here. It should be your full name (accents and all) before the email address, e.g: Signed-off-by: Alex Bennée <alex.ben...@linaro.org> > --- > accel/tcg/translate-all.c | 59 +++++++++++++++++++++++++++++++++++++++ > accel/tcg/translate-all.h | 2 ++ > exec.c | 7 +++++ > include/exec/exec-all.h | 3 ++ > include/exec/tb-context.h | 9 ++++++ > include/qom/cpu.h | 2 ++ > 6 files changed, 82 insertions(+) > > diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c > index 5d1e08b169..0bc670ffad 100644 > --- a/accel/tcg/translate-all.c > +++ b/accel/tcg/translate-all.c > @@ -1118,6 +1118,12 @@ static inline void code_gen_alloc(size_t tb_size) > } > } > > +static bool statistics_cmp(const void* ap, const void *bp) { > + const TBStatistics *a = ap; > + const TBStatistics *b = bp; > + return a->pc == b->pc && a->cs_base == b->cs_base && a->flags == > b->flags; Looking at tb_cmp() bellow this I wonder if we should also take into account the page_address values. Some TB's will be translated over a page boundary and in theory that can change with new mappings so we need to ensure they are really equivalent. > +} > + > static bool tb_cmp(const void *ap, const void *bp) > { > const TranslationBlock *a = ap; > @@ -1137,6 +1143,7 @@ static void tb_htable_init(void) > unsigned int mode = QHT_MODE_AUTO_RESIZE; > > qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode); > + qht_init(&tb_ctx.tb_statistics, statistics_cmp, CODE_GEN_HTABLE_SIZE, > QHT_MODE_AUTO_RESIZE); > } > > /* Must be called before using the QEMU cpus. 'tb_size' is the size > @@ -1228,6 +1235,53 @@ static gboolean tb_host_size_iter(gpointer key, > gpointer value, gpointer data) > return false; > } > > +static void do_tb_dump_exec_freq(void *p, uint32_t hash, void *userp) > +{ > +#if TARGET_LONG_SIZE == 8 > + TBStatistics *tbs = p; > + printf("%016lx\t%lu\n", tbs->pc, tbs->total_exec_freq); > +#elif TARGET_LONG_SIZE == 4 > + TBStatistics *tbs = p; > + printf("%016x\t%lu\n", tbs->pc, tbs->total_exec_freq); > +#endif > +} This will need some fixing up so deal with output to the HMP monitor. We don't want to be directly spamming stdout with results. > + > +void tb_dump_all_exec_freq(void) > +{ > + tb_read_exec_freq(); > + qht_iter(&tb_ctx.tb_statistics, do_tb_dump_exec_freq, NULL); > +} I would be tempted to insert these into a sorted GList first so we can dump the first N hotblocks. > + > +static void do_tb_read_exec_freq(void *p, uint32_t hash, void *userp) > +{ > + TranslationBlock *tb = p; > + TBStatistics tbscmp; > + tbscmp.pc = tb->pc; > + tbscmp.cs_base = tb->cs_base; > + tbscmp.flags = tb->flags; > + > + TBStatistics *tbs = qht_lookup(userp, &tbscmp, hash); > + > + uint64_t exec_freq = tb_get_and_reset_exec_freq((TranslationBlock*) p); > + > + if (tbs) { > + tbs->total_exec_freq += exec_freq; > + } else { > + void *existing; > + tbs = malloc(sizeof(TBStatistics)); use g_new0(TBStatistics, 1) > + tbs->total_exec_freq = exec_freq; > + tbs->pc = tb->pc; > + tbs->cs_base = tb->cs_base; > + tbs->flags = tb->flags; > + qht_insert(userp, tbs, hash, &existing); > + } > +} > + Comment here about what we are doing? Maybe tb_copy_old_counts()? > +void tb_read_exec_freq(void) > +{ > + qht_iter(&tb_ctx.htable, do_tb_read_exec_freq, &tb_ctx.tb_statistics); > +} > + > /* flush all the translation blocks */ > static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) > { > @@ -1252,6 +1306,10 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data > tb_flush_count) > cpu_tb_jmp_cache_clear(cpu); > } > > + if (enable_freq_count) { > + tb_read_exec_freq(); > + } > + > qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE); > page_flush_tb(); > > @@ -1723,6 +1781,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > tb->flags = flags; > tb->cflags = cflags; > tb->trace_vcpu_dstate = *cpu->trace_dstate; > + tb->exec_freq = 0; > tcg_ctx->tb_cflags = cflags; > tb_overflow: > > diff --git a/accel/tcg/translate-all.h b/accel/tcg/translate-all.h > index 64f5fd9a05..e13088c36d 100644 > --- a/accel/tcg/translate-all.h > +++ b/accel/tcg/translate-all.h > @@ -32,6 +32,8 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, > tb_page_addr_t end, > int is_cpu_write_access); > void tb_check_watchpoint(CPUState *cpu); > > +extern bool enable_freq_count; > + > #ifdef CONFIG_USER_ONLY > int page_unprotect(target_ulong address, uintptr_t pc); > #endif > diff --git a/exec.c b/exec.c > index e7622d1956..9b64a012ee 100644 > --- a/exec.c > +++ b/exec.c > @@ -1013,6 +1013,13 @@ const char *parse_cpu_option(const char *cpu_option) > return cpu_type; > } > > +uint64_t tb_get_and_reset_exec_freq(TranslationBlock *tb) > +{ > + uint64_t exec_freq = atomic_load_acquire(&tb->exec_freq); > + atomic_store_release(&tb->exec_freq, 0); I'm not sure you need acquire/release semantics here as you are only reading this in an exclusive period when all in-flight updates should be synced (locks are implicit barriers). Folding this up into do_tb_read_exec_freq might make this clearer. > + return exec_freq; > +} > + > #if defined(CONFIG_USER_ONLY) > void tb_invalidate_phys_addr(target_ulong addr) > { > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index a80407622e..5d3d829d18 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -513,4 +513,7 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu, > /* vl.c */ > extern int singlestep; > > +void tb_read_exec_freq(void); > +void tb_dump_all_exec_freq(void); > + > #endif > diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h > index feb585e0a7..ba518d47a0 100644 > --- a/include/exec/tb-context.h > +++ b/include/exec/tb-context.h > @@ -28,6 +28,14 @@ > > typedef struct TranslationBlock TranslationBlock; > typedef struct TBContext TBContext; > +typedef struct TBStatistics TBStatistics; > + > +struct TBStatistics { > + target_ulong pc; > + target_ulong cs_base; > + uint32_t flags; > + uint64_t total_exec_freq; > +}; > > struct TBContext { > > @@ -35,6 +43,7 @@ struct TBContext { > > /* statistics */ > unsigned tb_flush_count; > + struct qht tb_statistics; > }; > > extern TBContext tb_ctx; > diff --git a/include/qom/cpu.h b/include/qom/cpu.h > index 5ee0046b62..593c1f1137 100644 > --- a/include/qom/cpu.h > +++ b/include/qom/cpu.h > @@ -474,6 +474,8 @@ static inline void cpu_tb_jmp_cache_clear(CPUState *cpu) > } > } > > +uint64_t tb_get_and_reset_exec_freq(struct TranslationBlock*); > + > /** > * qemu_tcg_mttcg_enabled: > * Check whether we are running MultiThread TCG or not. -- Alex Bennée