On Wed, Nov 25, 2015 at 2:31 AM, James Greenhalgh <james.greenha...@arm.com> wrote: > On Tue, Nov 17, 2015 at 02:10:35PM -0800, Andrew Pinski wrote: >> >> Because the imp and parts are really integer rather than strings, this patch >> moves the comparisons to be integer. Also allows saving around integers are >> easier than doing string comparisons. This allows for the next change. >> >> The way I store BIG.little is (big<<12)|little as each part num is only >> 12bits >> long. So it would be nice if someone could test -mpu=native on a big.little >> system to make sure it works still. >> >> OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. >> >> Thanks, >> Andrew Pinski >> >> >> >> * config/aarch64/aarch64-cores.def: Rewrite so IMP and PART are integer >> constants. >> * config/aarch64/driver-aarch64.c (struct aarch64_core_data): Change >> implementer_id to unsigned char. >> Change part_no to unsigned int. >> (AARCH64_BIG_LITTLE): New define. >> (INVALID_IMP): New define. >> (INVALID_CORE): New define. >> (cpu_data): Change the last element's implementer_id and part_no to integers. >> (valid_bL_string_p): Rewrite to .. >> (valid_bL_core_p): this for integers instead of strings. >> (parse_field): New function. >> (contains_string_p): Rewrite to ... >> (contains_core_p): this for integers and only for the part_no. >> (host_detect_local_cpu): Rewrite handling of implementation and part num to >> be integers; >> simplifying the code. >> --- >> gcc/config/aarch64/aarch64-cores.def | 25 +++++----- >> gcc/config/aarch64/driver-aarch64.c | 90 >> ++++++++++++++++++++---------------- >> 2 files changed, 62 insertions(+), 53 deletions(-) >> >> diff --git a/gcc/config/aarch64/aarch64-cores.def >> b/gcc/config/aarch64/aarch64-cores.def >> index 0b456f7..798f3e3 100644 >> --- a/gcc/config/aarch64/aarch64-cores.def >> +++ b/gcc/config/aarch64/aarch64-cores.def >> @@ -33,25 +33,26 @@ >> This need not include flags implied by the architecture. >> COSTS is the name of the rtx_costs routine to use. >> IMP is the implementer ID of the CPU vendor. On a GNU/Linux system it >> can >> - be found in /proc/cpuinfo. >> + be found in /proc/cpuinfo. There is a list in the ARM ARM. > > Let's be precise with this comment. > > "A partial list of implementer IDs is given in the ARM Architecture > Reference Manual ARMv8, for ARMv8-A architecture profile" > >> PART is the part number of the CPU. On a GNU/Linux system it can be >> found >> - in /proc/cpuinfo. For big.LITTLE systems this should have the form at of >> - "<big core part number>.<LITTLE core part number>". */ >> + in /proc/cpuinfo. For big.LITTLE systems this should use the macro >> AARCH64_BIG_LITTLE >> + where the big part number comes as the first arugment to the macro and >> little is the > > s/arugment/argument/. > >> + second. */ >> >> /* V8 Architecture Processors. */ >> >> -AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 >> | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03") >> -AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 >> | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07") >> -AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 >> | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08") >> -AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 >> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, "0x53", "0x001") >> -AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 >> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x51", "0x800") >> -AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 >> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "0x43", "0x0a1") >> -AARCH64_CORE("xgene1", xgene1, xgene1, 8A, >> AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000") >> +AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 >> | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03) >> +AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 >> | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07) >> +AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 >> | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08) >> +AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 >> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, 0x53, 0x001) >> +AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 >> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, 0x51, 0x800) >> +AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 >> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a1) >> +AARCH64_CORE("xgene1", xgene1, xgene1, 8A, >> AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000) >> >> /* V8 big.LITTLE implementations. */ >> >> -AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, >> AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03") >> -AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, >> AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08.0xd03") >> +AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, >> AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, >> AARCH64_BIG_LITTLE(0xd07, 0xd03)) >> +AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, >> AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, >> AARCH64_BIG_LITTLE(0xd08, 0xd03)) >> >> >> #undef AARCH64_CORE >> diff --git a/gcc/config/aarch64/driver-aarch64.c >> b/gcc/config/aarch64/driver-aarch64.c >> index d03654c..92388a9 100644 >> --- a/gcc/config/aarch64/driver-aarch64.c >> +++ b/gcc/config/aarch64/driver-aarch64.c >> @@ -37,18 +37,23 @@ static struct arch_extension ext_to_feat_string[] = >> struct aarch64_core_data >> { >> const char* name; >> - const char* arch; >> - const char* implementer_id; >> - const char* part_no; >> + const char *arch; >> + unsigned char implementer_id; /* Exactly 8 bits */ > > Can we redesign this around the most general case - big.LITTLE cores with > different implementer IDs? There is nothing in the architecture that would > forbid this, and Samsung recently announced that their Exynos 8 Octa 8890 > which would support a combination of four of their custom cores with four ARM > Cortex-A53 cores. > > ( > http://www.samsung.com/semiconductor/minisite/Exynos/w/mediacenter.html#?v=news_Samsung_Unveils_a_New_Exynos_8_Octa_Processor_based_on_advanced_14-Nanometer_FinFET_Process_Technology > )
This should be done a separate patch as even the current infrastructure does not support it at all. It would be a simple one though. Change implementer_id to int and then encode the two implementer_id in cores_data. Though the parsing part becomes more complex as the current parsing does not understand if a new core has been started or not. This is why it should be a separate patch instead. Maybe to support that one it is not about parsing /proc/cpuinfo but rather /sys/cpu/* instead which is supported for Linux 4.4 (or did not make it into 4.4 so 4.5). Thanks, Andrew > >> + unsigned int part_no; /* 12 bits + 12 bits */ >> }; >> >> +#define AARCH64_BIG_LITTLE(BIG, LITTLE) \ >> + (((BIG)&0xFFFu) << 12 | ((LITTLE) & 0xFFFu)) >> +#define INVALID_IMP ((unsigned char) -1) >> +#define INVALID_CORE ((unsigned)-1) >> + >> #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, >> PART) \ >> { CORE_NAME, #ARCH, IMP, PART }, >> >> static struct aarch64_core_data cpu_data [] = >> { >> #include "aarch64-cores.def" >> - { NULL, NULL, NULL, NULL } >> + { NULL, NULL, INVALID_IMP, INVALID_CORE } >> }; >> >> >> @@ -91,27 +96,35 @@ get_arch_name_from_id (const char* id) >> should return true. */ >> >> static bool >> -valid_bL_string_p (const char** core, const char* bL_string) >> +valid_bL_core_p (unsigned int *core, unsigned int bL_core) >> { >> - return strstr (bL_string, core[0]) != NULL >> - && strstr (bL_string, core[1]) != NULL; >> + return AARCH64_BIG_LITTLE (core[0], core[1]) == bL_core; >> } >> >> -/* Return true iff ARR contains STR in one of its two elements. */ >> >> -static bool >> -contains_string_p (const char** arr, const char* str) >> +/* Returns the integer that is after ':' for the field. */ >> +static unsigned parse_field (const char *field) >> { >> - bool res = false; >> + const char *rest = strchr (field, ':'); >> + char *after; >> + unsigned fint = strtol (rest+1, &after, 16); >> + if (after == rest+1) > > "rest + 1" in both cases. > >> + return -1; >> + return fint; >> +} >> + >> +/* Return true iff ARR contains CORE, in either of the two elements. */ >> >> - if (arr[0] != NULL) >> +static bool >> +contains_core_p (unsigned *arr, unsigned core) >> +{ >> + if (arr[0] != INVALID_CORE) >> { >> - res = strstr (arr[0], str) != NULL; >> - if (res) >> - return res; >> + if (arr[0] == core) >> + return true; >> >> - if (arr[1] != NULL) >> - return strstr (arr[1], str) != NULL; >> + if (arr[1] != INVALID_CORE) >> + return arr[1] == core; >> } >> >> return false; >> @@ -146,10 +159,9 @@ host_detect_local_cpu (int argc, const char **argv) >> bool cpu = false; >> unsigned int i = 0; >> unsigned int core_idx = 0; >> - const char* imps[2] = { NULL, NULL }; >> - const char* cores[2] = { NULL, NULL }; >> + unsigned char imp = INVALID_IMP; >> + unsigned int cores[2] = { INVALID_CORE, INVALID_CORE }; >> unsigned int n_cores = 0; >> - unsigned int n_imps = 0; >> bool processed_exts = false; >> const char *ext_string = ""; >> >> @@ -180,30 +192,28 @@ host_detect_local_cpu (int argc, const char **argv) >> { >> if (strstr (buf, "implementer") != NULL) >> { >> - for (i = 0; cpu_data[i].name != NULL; i++) >> - if (strstr (buf, cpu_data[i].implementer_id) != NULL >> - && !contains_string_p (imps, cpu_data[i].implementer_id)) >> - { >> - if (n_imps == 2) >> - goto not_found; >> - >> - imps[n_imps++] = cpu_data[i].implementer_id; >> - >> - break; >> - } >> - continue; >> + unsigned cimp = parse_field (buf); >> + if (cimp == INVALID_IMP) >> + goto not_found; >> + >> + if (imp == INVALID_IMP) >> + imp = cimp; >> + /* BIG.little implementers are always equal. */ > > See my comment above. This comment does not neccessarily hold. > > In addition, this needs updating for Kyrill's comments. > > Thanks, > James >