On Tue, Nov 17, 2015 at 02:10:35PM -0800, Andrew Pinski wrote:
>
> Because the imp and parts are really integer rather than strings, this patch
> moves the comparisons to be integer. Also allows saving around integers are
> easier than doing string comparisons. This allows for the next change.
>
> The way I store BIG.little is (big<<12)|little as each part num is only 12bits
> long. So it would be nice if someone could test -mpu=native on a big.little
> system to make sure it works still.
>
> OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>
> Thanks,
> Andrew Pinski
>
>
>
> * config/aarch64/aarch64-cores.def: Rewrite so IMP and PART are integer
> constants.
> * config/aarch64/driver-aarch64.c (struct aarch64_core_data): Change
> implementer_id to unsigned char.
> Change part_no to unsigned int.
> (AARCH64_BIG_LITTLE): New define.
> (INVALID_IMP): New define.
> (INVALID_CORE): New define.
> (cpu_data): Change the last element's implementer_id and part_no to integers.
> (valid_bL_string_p): Rewrite to ..
> (valid_bL_core_p): this for integers instead of strings.
> (parse_field): New function.
> (contains_string_p): Rewrite to ...
> (contains_core_p): this for integers and only for the part_no.
> (host_detect_local_cpu): Rewrite handling of implementation and part num to
> be integers;
> simplifying the code.
> ---
> gcc/config/aarch64/aarch64-cores.def | 25 +++++-----
> gcc/config/aarch64/driver-aarch64.c | 90
> ++++++++++++++++++++----------------
> 2 files changed, 62 insertions(+), 53 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-cores.def
> b/gcc/config/aarch64/aarch64-cores.def
> index 0b456f7..798f3e3 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -33,25 +33,26 @@
> This need not include flags implied by the architecture.
> COSTS is the name of the rtx_costs routine to use.
> IMP is the implementer ID of the CPU vendor. On a GNU/Linux system it can
> - be found in /proc/cpuinfo.
> + be found in /proc/cpuinfo. There is a list in the ARM ARM.
Let's be precise with this comment.
"A partial list of implementer IDs is given in the ARM Architecture
Reference Manual ARMv8, for ARMv8-A architecture profile"
> PART is the part number of the CPU. On a GNU/Linux system it can be found
> - in /proc/cpuinfo. For big.LITTLE systems this should have the form at of
> - "<big core part number>.<LITTLE core part number>". */
> + in /proc/cpuinfo. For big.LITTLE systems this should use the macro
> AARCH64_BIG_LITTLE
> + where the big part number comes as the first arugment to the macro and
> little is the
s/arugment/argument/.
> + second. */
>
> /* V8 Architecture Processors. */
>
> -AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03")
> -AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07")
> -AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08")
> -AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, "0x53", "0x001")
> -AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x51", "0x800")
> -AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "0x43", "0x0a1")
> -AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8,
> xgene1, "0x50", "0x000")
> +AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03)
> +AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07)
> +AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08)
> +AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, 0x53, 0x001)
> +AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, 0x51, 0x800)
> +AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a1)
> +AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8,
> xgene1, 0x50, 0x000)
>
> /* V8 big.LITTLE implementations. */
>
> -AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A,
> AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03")
> -AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A,
> AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08.0xd03")
> +AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A,
> AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41,
> AARCH64_BIG_LITTLE(0xd07, 0xd03))
> +AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A,
> AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41,
> AARCH64_BIG_LITTLE(0xd08, 0xd03))
>
>
> #undef AARCH64_CORE
> diff --git a/gcc/config/aarch64/driver-aarch64.c
> b/gcc/config/aarch64/driver-aarch64.c
> index d03654c..92388a9 100644
> --- a/gcc/config/aarch64/driver-aarch64.c
> +++ b/gcc/config/aarch64/driver-aarch64.c
> @@ -37,18 +37,23 @@ static struct arch_extension ext_to_feat_string[] =
> struct aarch64_core_data
> {
> const char* name;
> - const char* arch;
> - const char* implementer_id;
> - const char* part_no;
> + const char *arch;
> + unsigned char implementer_id; /* Exactly 8 bits */
Can we redesign this around the most general case - big.LITTLE cores with
different implementer IDs? There is nothing in the architecture that would
forbid this, and Samsung recently announced that their Exynos 8 Octa 8890
which would support a combination of four of their custom cores with four ARM
Cortex-A53 cores.
(
http://www.samsung.com/semiconductor/minisite/Exynos/w/mediacenter.html#?v=news_Samsung_Unveils_a_New_Exynos_8_Octa_Processor_based_on_advanced_14-Nanometer_FinFET_Process_Technology
)
> + unsigned int part_no; /* 12 bits + 12 bits */
> };
>
> +#define AARCH64_BIG_LITTLE(BIG, LITTLE) \
> + (((BIG)&0xFFFu) << 12 | ((LITTLE) & 0xFFFu))
> +#define INVALID_IMP ((unsigned char) -1)
> +#define INVALID_CORE ((unsigned)-1)
> +
> #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP,
> PART) \
> { CORE_NAME, #ARCH, IMP, PART },
>
> static struct aarch64_core_data cpu_data [] =
> {
> #include "aarch64-cores.def"
> - { NULL, NULL, NULL, NULL }
> + { NULL, NULL, INVALID_IMP, INVALID_CORE }
> };
>
>
> @@ -91,27 +96,35 @@ get_arch_name_from_id (const char* id)
> should return true. */
>
> static bool
> -valid_bL_string_p (const char** core, const char* bL_string)
> +valid_bL_core_p (unsigned int *core, unsigned int bL_core)
> {
> - return strstr (bL_string, core[0]) != NULL
> - && strstr (bL_string, core[1]) != NULL;
> + return AARCH64_BIG_LITTLE (core[0], core[1]) == bL_core;
> }
>
> -/* Return true iff ARR contains STR in one of its two elements. */
>
> -static bool
> -contains_string_p (const char** arr, const char* str)
> +/* Returns the integer that is after ':' for the field. */
> +static unsigned parse_field (const char *field)
> {
> - bool res = false;
> + const char *rest = strchr (field, ':');
> + char *after;
> + unsigned fint = strtol (rest+1, &after, 16);
> + if (after == rest+1)
"rest + 1" in both cases.
> + return -1;
> + return fint;
> +}
> +
> +/* Return true iff ARR contains CORE, in either of the two elements. */
>
> - if (arr[0] != NULL)
> +static bool
> +contains_core_p (unsigned *arr, unsigned core)
> +{
> + if (arr[0] != INVALID_CORE)
> {
> - res = strstr (arr[0], str) != NULL;
> - if (res)
> - return res;
> + if (arr[0] == core)
> + return true;
>
> - if (arr[1] != NULL)
> - return strstr (arr[1], str) != NULL;
> + if (arr[1] != INVALID_CORE)
> + return arr[1] == core;
> }
>
> return false;
> @@ -146,10 +159,9 @@ host_detect_local_cpu (int argc, const char **argv)
> bool cpu = false;
> unsigned int i = 0;
> unsigned int core_idx = 0;
> - const char* imps[2] = { NULL, NULL };
> - const char* cores[2] = { NULL, NULL };
> + unsigned char imp = INVALID_IMP;
> + unsigned int cores[2] = { INVALID_CORE, INVALID_CORE };
> unsigned int n_cores = 0;
> - unsigned int n_imps = 0;
> bool processed_exts = false;
> const char *ext_string = "";
>
> @@ -180,30 +192,28 @@ host_detect_local_cpu (int argc, const char **argv)
> {
> if (strstr (buf, "implementer") != NULL)
> {
> - for (i = 0; cpu_data[i].name != NULL; i++)
> - if (strstr (buf, cpu_data[i].implementer_id) != NULL
> - && !contains_string_p (imps, cpu_data[i].implementer_id))
> - {
> - if (n_imps == 2)
> - goto not_found;
> -
> - imps[n_imps++] = cpu_data[i].implementer_id;
> -
> - break;
> - }
> - continue;
> + unsigned cimp = parse_field (buf);
> + if (cimp == INVALID_IMP)
> + goto not_found;
> +
> + if (imp == INVALID_IMP)
> + imp = cimp;
> + /* BIG.little implementers are always equal. */
See my comment above. This comment does not neccessarily hold.
In addition, this needs updating for Kyrill's comments.
Thanks,
James