On June 22, 2016 2:37:04 PM GMT+02:00, Andi Kleen <a...@firstfloor.org> wrote: >From: Andi Kleen <a...@linux.intel.com> > >Using autofdo is currently something difficult. It requires using the >model specific branches taken event, which differs on different CPUs. >The example shown in the manual requires a special patched version of >perf that is non standard, and also will likely not work everywhere. > >This patch adds a new gcc-auto-profile script that figures out the >correct event and runs perf. > >This is needed to actually make use of autofdo in a generic way >in the build system and in the test suite. > >Since maintaining the script would be somewhat tedious (needs changes >every time a new CPU comes out) I auto generated it from the online >Intel event database. The script to do that is in contrib and can be >rerun. > >Right now there is no test if perf works in configure. This >would vary depending on the build and target system, and since >it currently doesn't work in virtualization and needs uptodate >kernel it may often fail in common distribution build setups. > >So far the script is not installed. > >v2: Remove documentation of gcc-auto-profile, as its not >installed. > >gcc/: >2016-06-22 Andi Kleen <a...@linux.intel.com> > > * doc/invoke.texi: Document gcc-auto-profile > * config/i386/gcc-auto-profile: New file. > >contrib/: > >2016-06-22 Andi Kleen <a...@linux.intel.com> > > * gen_autofdo_event.py: New file to regenerate > gcc-auto-profile. >--- >contrib/gen_autofdo_event.py | 155 >+++++++++++++++++++++++++++++++++++++++ > gcc/config/i386/gcc-auto-profile | 70 ++++++++++++++++++ > 2 files changed, 225 insertions(+) > create mode 100755 contrib/gen_autofdo_event.py > create mode 100755 gcc/config/i386/gcc-auto-profile > >diff --git a/contrib/gen_autofdo_event.py >b/contrib/gen_autofdo_event.py >new file mode 100755 >index 0000000..66cd613 >--- /dev/null >+++ b/contrib/gen_autofdo_event.py >@@ -0,0 +1,155 @@ >+#!/usr/bin/python >+# Generate Intel taken branches Linux perf event script for autofdo >profiling. >+ >+# Copyright (C) 2016 Free Software Foundation, Inc. >+# >+# GCC is free software; you can redistribute it and/or modify it under >+# the terms of the GNU General Public License as published by the Free >+# Software Foundation; either version 3, or (at your option) any later >+# version. >+# >+# GCC is distributed in the hope that it will be useful, but WITHOUT >ANY >+# WARRANTY; without even the implied warranty of MERCHANTABILITY or >+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public >License >+# for more details. >+# >+# You should have received a copy of the GNU General Public License >+# along with GCC; see the file COPYING3. If not see >+# <http://www.gnu.org/licenses/>. */ >+ >+# Run it with perf record -b -e EVENT program ... >+# The Linux Kernel needs to support the PMU of the current CPU, and >+# It will likely not work in VMs. >+# Add --all to print for all cpus, otherwise for current cpu. >+# Add --script to generate shell script to run correct event. >+# >+# Requires internet (https) access. This may require setting up a >proxy >+# with export https_proxy=... >+# >+import urllib2 >+import sys >+import json >+import argparse >+import collections >+ >+baseurl = "https://download.01.org/perfmon" >+ >+target_events = (u'BR_INST_RETIRED.NEAR_TAKEN', >+ u'BR_INST_EXEC.TAKEN', >+ u'BR_INST_RETIRED.TAKEN_JCC', >+ u'BR_INST_TYPE_RETIRED.COND_TAKEN') >+ >+ap = argparse.ArgumentParser() >+ap.add_argument('--all', '-a', help='Print for all CPUs', >action='store_true') >+ap.add_argument('--script', help='Generate shell script', >action='store_true') >+args = ap.parse_args() >+ >+eventmap = collections.defaultdict(list) >+ >+def get_cpu_str(): >+ with open('/proc/cpuinfo', 'r') as c: >+ vendor, fam, model = None, None, None >+ for j in c: >+ n = j.split() >+ if n[0] == 'vendor_id': >+ vendor = n[2] >+ elif n[0] == 'model' and n[1] == ':': >+ model = int(n[2]) >+ elif n[0] == 'cpu' and n[1] == 'family': >+ fam = int(n[3]) >+ if vendor and fam and model: >+ return "%s-%d-%X" % (vendor, fam, model), model >+ return None, None >+ >+def find_event(eventurl, model): >+ print >>sys.stderr, "Downloading", eventurl >+ u = urllib2.urlopen(eventurl) >+ events = json.loads(u.read()) >+ u.close() >+ >+ found = 0 >+ for j in events: >+ if j[u'EventName'] in target_events: >+ event = "cpu/event=%s,umask=%s/" % (j[u'EventCode'], >j[u'UMask']) >+ if u'PEBS' in j and j[u'PEBS'] > 0:
I'd have said if j.get(u'PEBS, 0) > 0: I.e. not use the default None for lets not in the but zero and test against that. I think that's more pythonic but either way. >+ event += "p" >+ if args.script: >+ eventmap[event].append(model) >+ else: >+ print j[u'EventName'], "event for model", model, "is", >event >+ found += 1 >+ return found >+ >+if not args.all: >+ cpu, model = get_cpu_str() >+ if not cpu: >+ sys.exit("Unknown CPU type") >+ >+url = baseurl + "/mapfile.csv" >+print >>sys.stderr, "Downloading", url >+u = urllib2.urlopen(url) >+found = 0 >+cpufound = 0 >+for j in u: >+ n = j.rstrip().split(',') >+ if len(n) >= 4 and (args.all or n[0] == cpu) and n[3] == "core": >+ if args.all: >+ vendor, fam, model = n[0].split("-") >+ model = int(model, 16) >+ cpufound += 1 >+ found += find_event(baseurl + n[2], model) >+u.close() >+ >+if args.script: >+ print '''#!/bin/sh >+# Profile workload for gcc profile feedback (autofdo) using Linux >perf. >+# Auto generated. To regenerate for new CPUs run >+# contrib/gen_autofdo_event.py --shell --all in gcc source >+ >+# usages: >+# gcc-auto-profile program (profile program and children) >+# gcc-auto-profile -a sleep X (profile all for X secs, may >need root) >+# gcc-auto-profile -p PID sleep X (profile PID) >+# gcc-auto-profile --kernel -a sleep X (profile kernel) >+# gcc-auto-profile --all -a sleep X (profile kernel and user space) >+ >+# Identify branches taken event for CPU. >+# >+ >+FLAGS=u >+ >+if [ "$1" = "--kernel" ] ; then >+ FLAGS=k >+ shift >+fi >+if [ "$1" = "--all" ] ; then >+ FLAGS=uk >+ shift >+fi Thanks for fixing the above! >+ >+if ! grep -q Intel /proc/cpuinfo ] ; then But here there is a bracket too much, unless my MUA plays tricks on me.. >+ echo >&2 "Only Intel CPUs supported" >+ exit 1 >+fi >+ >+if grep -q hypervisor /proc/cpuinfo ; then >+ echo >&2 "Warning: branch profiling may not be functional in VMs" >+fi >+ >+case `egrep -q "^cpu family\s*: 6" /proc/cpuinfo && >+ egrep "^model\s*:" /proc/cpuinfo | head -n1` in''' $() please >+ for event, mod in eventmap.iteritems(): IIRC iteritems is deprecated. >+ for m in mod[:-1]: >+ print "model*:\ %s|\\" % m >+ print 'model*:\ %s) E="%s$FLAGS" ;;' % (mod[-1], event) >+ print '''*) >+echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script >to update script." >+ exit 1 ;;''' >+ print "esac" >+ print 'exec perf record -e $E -b "$@"' Need to quote $E ? >+ >+if cpufound == 0 and not args.all: >+ sys.exit('CPU %s not found' % cpu) >+ >+if found == 0: >+ sys.exit('Branch event not found') >diff --git a/gcc/config/i386/gcc-auto-profile >b/gcc/config/i386/gcc-auto-profile >new file mode 100755 >index 0000000..f60cefb >--- /dev/null >+++ b/gcc/config/i386/gcc-auto-profile >@@ -0,0 +1,70 @@ >+#!/bin/sh >+# profile workload for gcc profile feedback (autofdo) using Linux perf >+# auto generated. to regenerate for new CPUs run >+# contrib/gen_autofdo_event.py --shell --all in gcc source >+ >+# usages: >+# gcc-auto-profile program (profile program and children) >+# gcc-auto-profile -a sleep X (profile all for X secs, may >need root) >+# gcc-auto-profile -p PID sleep X (profile PID) >+# gcc-auto-profile --kernel -a sleep X (profile kernel) >+# gcc-auto-profile --all -a sleep X (profile kernel and user space) >+ >+# identify branches taken event for CPU >+# >+ >+FLAGS=u >+ >+if [ "$1" = "--kernel" ] ; then >+ FLAGS=k >+ shift >+fi >+if [ "$1" = "--all" ] ; then >+ FLAGS=uk >+ shift >+fi >+ >+if ! grep -q Intel /proc/cpuinfo ] ; then I'm surprised this even runs? thanks, >+ echo >&2 "Only Intel CPUs supported" >+ exit 1 >+fi >+ >+if grep -q hypervisor /proc/cpuinfo ; then >+ echo >&2 "Warning: branch profiling may not be functional in VMs" >+fi >+ >+case `egrep -q "^cpu family\s*: 6" /proc/cpuinfo && >+ egrep "^model\s*:" /proc/cpuinfo | head -n1` in >+model*:\ 55|\ >+model*:\ 77|\ >+model*:\ 76) E="cpu/event=0xC4,umask=0xFE/p$FLAGS" ;; >+model*:\ 42|\ >+model*:\ 45|\ >+model*:\ 58|\ >+model*:\ 62|\ >+model*:\ 60|\ >+model*:\ 69|\ >+model*:\ 70|\ >+model*:\ 63|\ >+model*:\ 61|\ >+model*:\ 71|\ >+model*:\ 86|\ >+model*:\ 78|\ >+model*:\ 94) E="cpu/event=0xC4,umask=0x20/p$FLAGS" ;; >+model*:\ 46|\ >+model*:\ 30|\ >+model*:\ 31|\ >+model*:\ 26|\ >+model*:\ 47|\ >+model*:\ 37|\ >+model*:\ 44) E="cpu/event=0x88,umask=0x40/p$FLAGS" ;; >+model*:\ 28|\ >+model*:\ 38|\ >+model*:\ 39|\ >+model*:\ 54|\ >+model*:\ 53) E="cpu/event=0x88,umask=0x41/p$FLAGS" ;; >+*) >+echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script >to update script." >+ exit 1 ;; >+esac >+exec perf record -e $E -b "$@"