Re: [PATCH 4/4] Wire X-Gene 1 up in the ARM (32bit) backend as a AArch32-capable core.
On 12/01/15 20:15, Philipp Tomsich wrote: --- gcc/ChangeLog-2014| 10 ++ gcc/config/arm/arm-cores.def | 1 + gcc/config/arm/arm-tables.opt | 3 +++ gcc/config/arm/arm-tune.md| 3 ++- gcc/config/arm/arm.c | 22 ++ gcc/config/arm/arm.md | 11 +-- gcc/config/arm/bpabi.h| 2 ++ gcc/config/arm/t-arm | 1 + gcc/doc/invoke.texi | 3 ++- 9 files changed, 52 insertions(+), 4 deletions(-) diff --git a/gcc/ChangeLog-2014 b/gcc/ChangeLog-2014 index dd49d7f..c3c62db 100644 --- a/gcc/ChangeLog-2014 +++ b/gcc/ChangeLog-2014 @@ -3497,6 +3497,16 @@ 63965. * config/rs6000/rs6000.c: Likewise. +2014-12-23 Philipp Tomsich + + * config/arm/arm.md (generic_sched): Specify xgene1 in 'no' list. + Include xgene1.md. + * config/arm/arm.c (arm_issue_rate): Specify 4 for xgene1. + * config/arm/arm-cores.def (xgene1): New entry. + * config/arm/arm-tables.opt: Regenerate. + * config/arm/arm-tune.md: Regenerate. + * config/arm/bpabi.h (BE8_LINK_SPEC): Specify mcpu=xgene1. + 2014-11-22 Jan Hubicka PR ipa/63671 diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def index be125ac..fa13eb9 100644 --- a/gcc/config/arm/arm-cores.def +++ b/gcc/config/arm/arm-cores.def @@ -167,6 +167,7 @@ ARM_CORE("cortex-a17.cortex-a7", cortexa17cortexa7, cortexa7, 7A, FL_LDSCHED | /* V8 Architecture Processors */ ARM_CORE("cortex-a53", cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a53) ARM_CORE("cortex-a57", cortexa57, cortexa15, 8A, FL_LDSCHED | FL_CRC32, cortex_a57) +ARM_CORE("xgene1", xgene1,xgene1, 8A, FL_LDSCHED, xgene1) /* V8 big.LITTLE implementations */ ARM_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a57) diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt index ece9d5e..1392429 100644 --- a/gcc/config/arm/arm-tables.opt +++ b/gcc/config/arm/arm-tables.opt @@ -310,6 +310,9 @@ EnumValue Enum(processor_type) String(cortex-a57) Value(cortexa57) EnumValue +Enum(processor_type) String(xgene1) Value(xgene1) + +EnumValue Enum(processor_type) String(cortex-a57.cortex-a53) Value(cortexa57cortexa53) Enum diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md index 452820ab..dcd5054 100644 --- a/gcc/config/arm/arm-tune.md +++ b/gcc/config/arm/arm-tune.md @@ -32,5 +32,6 @@ cortexr4f,cortexr5,cortexr7, cortexm7,cortexm4,cortexm3, marvell_pj4,cortexa15cortexa7,cortexa17cortexa7, - cortexa53,cortexa57,cortexa57cortexa53" + cortexa53,cortexa57,xgene1, + cortexa57cortexa53" (const (symbol_ref "((enum attr_tune) arm_tune)"))) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 8ca2dd8..14c8a87 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -1903,6 +1903,25 @@ const struct tune_params arm_cortex_a57_tune = ARM_FUSE_MOVW_MOVT /* Fuseable pairs of instructions. */ }; +const struct tune_params arm_xgene1_tune = +{ + arm_9e_rtx_costs, + &xgene1_extra_costs, + NULL,/* Scheduler cost adjustment. */ + 1, /* Constant limit. */ + 2, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_default_branch_cost, + true,/* Prefer LDRD/STRD. */ + {true, true},/* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false, /* Prefer Neon for 64-bits bitops. */ + true, true, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 32 /* Maximum insns to inline memset. */ +}; + /* Branches can be dual-issued on Cortex-A5, so conditional execution is less appealing. Set max_insns_skipped to a low value. */ @@ -27066,6 +27085,9 @@ arm_issue_rate (void) { switch (arm_tune) { +case xgene1: + return 4; + case cortexa15: case cortexa57: return 3; diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index c61057f..a3cbf3b 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -109,6 +109,11 @@ ;; given instruction does not shift one of its input operands. (define_attr "shift" "" (const_int 0)) +;; [For compatibility with AArch64 in pipeline models] +;; Attribute that specifies whether or not the instruction touches fp +;; registers. +(define_attr "fp" "no,yes" (const_string "no")) + ; Floating Point Unit. If we only have floatin
Re: [PATCH 3/4] Change the type of the prefetch-instructions to 'prefetch'.
On 12/01/15 20:15, Philipp Tomsich wrote: --- gcc/config/aarch64/aarch64.md | 2 +- gcc/config/arm/types.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 1f6b1b6..98f4f30 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -391,7 +391,7 @@ return pftype[INTVAL(operands[1])][locality]; } - [(set_attr "type" "load1")] + [(set_attr "type" "prefetch")] ) (define_insn "trap" diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md index d368446..088c21a 100644 --- a/gcc/config/arm/types.md +++ b/gcc/config/arm/types.md @@ -118,6 +118,7 @@ ; mvn_shift_reg inverting move instruction, shifted operand by a register. ; no_insnan insn which does not represent an instruction in the ;final output, thus having no impact on scheduling. +; prefetch a prefetch instruction ; rbit reverse bits. ; revreverse bytes. ; sdiv signed division. @@ -556,6 +557,7 @@ call,\ clz,\ no_insn,\ + prefetch,\ csel,\ crc,\ extend,\ Can you follow up with a patch that adjusts the "prefetch" insn attribute in the ARM backend and uses this consistently in all the pipeline descriptions (i.e. treat this equivalent to load1 and make sure the compiler builds for AArch32 afterwards) ? It would be complete to do the same for all the pipeline descriptions in the AArch64 backend too. Ramana
Re: [patch 1/2][ARM]: New CPU support for Marvell Whitney
On 09/01/2015 19:22, Kyrill Tkachov wrote: Hi Xingxing, On 19/12/14 11:01, Xingxing Pan wrote: +/* Return true if vector element size is byte. */ Minor nit: two spaces after full stop and before */ Same in other places in the patch. +bool +marvell_whitney_vector_element_size_is_byte (rtx insn) +{ + if (GET_CODE (PATTERN (insn)) == SET) +{ + if ((GET_MODE (SET_DEST (PATTERN (insn))) == V8QImode) || + (GET_MODE (SET_DEST (PATTERN (insn))) == V16QImode)) + return true; +} + + return false; +} I see this is called from inside marvell-whitney.md. It seems to me that this function takes RTX insns. Can the type of this be strengthened to rtx_insn * ? Also, this should be refactored and written a bit more generally by checking for VECTOR_MODE_P and then GET_MODE_INNER for QImode, saving you the trouble of enumerating the different vector QI modes. + +/* Return true if INSN has shift operation but is not a shift insn. */ +bool +marvell_whitney_non_shift_with_shift_operand (rtx insn) Similar comment. Can this be strengthened to rtx_insn * ? Thanks, Kyrill +{ + rtx pat = PATTERN (insn); + + if (GET_CODE (pat) != SET) +return false; + + /* Is not a shift insn. */ + rtx rvalue = SET_SRC (pat); + RTX_CODE code = GET_CODE (rvalue); + if (code == ASHIFT || code == ASHIFTRT + || code == LSHIFTRT || code == ROTATERT) +return false; + + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, rvalue, ALL) +{ + /* Has shift operation. */ + RTX_CODE code = GET_CODE (*iter); + if (code == ASHIFT || code == ASHIFTRT + || code == LSHIFTRT || code == ROTATERT) +return true; +} + + return false; +} Hi Kyrill, Thanks for advice. Refactored patch is attached. -- Regards, Xingxing commit 3627056607b1e8604ac8d85ed44fdc7d3209cd3e Author: Xingxing Pan Date: Thu Dec 18 16:58:05 2014 +0800 2015-01-13 Xingxing Pan * config/arm/arm-cores.def: Add new core marvell-whitney. * config/arm/arm-protos.h: (marvell_whitney_vector_mode_qi): Declare. (marvell_whitney_inner_shift): Ditto. * config/arm/arm-tables.opt: Regenerated. * config/arm/arm-tune.md: Regenerated. * config/arm/arm.c (arm_marvell_whitney_tune): New structure. (arm_issue_rate): Add marvell_whitney. (marvell_whitney_vector_mode_qi): New function. (marvell_whitney_inner_shift): Ditto. * config/arm/arm.md: Include marvell-whitney.md. (generic_sched): Add marvell_whitney. (generic_vfp): Ditto. * config/arm/bpabi.h (BE8_LINK_SPEC): Add marvell-whitney. * config/arm/t-arm (MD_INCLUDES): Add marvell-whitney.md. * config/arm/marvell-whitney.md: New file. * doc/invoke.texi: Document marvell-whitney. diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def index 6fa5d99..26eb7ab 100644 --- a/gcc/config/arm/arm-cores.def +++ b/gcc/config/arm/arm-cores.def @@ -159,6 +159,7 @@ ARM_CORE("cortex-m7", cortexm7, cortexm7, 7EM, FL_LDSCHED, cortex_m7) ARM_CORE("cortex-m4", cortexm4, cortexm4, 7EM, FL_LDSCHED, v7m) ARM_CORE("cortex-m3", cortexm3, cortexm3, 7M, FL_LDSCHED, v7m) ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, 9e) +ARM_CORE("marvell-whitney", marvell_whitney, marvell_whitney, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, marvell_whitney) /* V7 big.LITTLE implementations */ ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index fc45348..45001ae 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -231,6 +231,9 @@ extern void arm_order_regs_for_local_alloc (void); extern int arm_max_conditional_execute (); +extern bool marvell_whitney_vector_mode_qi (rtx_insn *insn); +extern bool marvell_whitney_inner_shift (rtx_insn *insn); + /* Vectorizer cost model implementation. */ struct cpu_vec_costs { const int scalar_stmt_cost; /* Cost of any scalar operation, excluding diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt index ece9d5e..dc5f364 100644 --- a/gcc/config/arm/arm-tables.opt +++ b/gcc/config/arm/arm-tables.opt @@ -298,6 +298,9 @@ EnumValue Enum(processor_type) String(marvell-pj4) Value(marvell_pj4) EnumValue +Enum(processor_type) String(marvell-whitney) Value(marvell_whitney) + +EnumValue Enum(processor_type) String(cortex-a15.cortex-a7) Value(cortexa15cortexa7) EnumValue diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md index 452820ab..c73c33c 100644 --- a/gcc/config/arm/arm-tune.md +++ b/gcc/config/arm/arm-tune.md @@ -31,6 +31,7 @@ cortexa15,cortexa17,cortexr4, cortexr4f,cortexr5,cortexr7, cortexm7,cortexm4,cortexm3, - marvell_pj4,cortexa15cortexa7,cortexa17cortexa7, - cortexa53,cortexa57,cortexa57cortexa53" + marvell_pj4,marvell_whitney
Re: [RFC PATCH Fortran] make enum_9/10.f90 testcases work under FreeBSD ARM
On Sun, Jan 11, 2015 at 9:55 PM, Andreas Tobler wrote: > Hi, > > I have here a possible way to make the enum_9.f90 and the enum_10.f90 work > under arm*-*-freebsd*. The solution for enum_9.f90 is straight forward. But > the one for enum_10.f90 requires a reordering of the dg-additional-sources > line. This I do not understand yet, but maybe one of you does. > > If I have the original ordering and change the dg-options to check on > 'target arm_eabi' I get strange compilation errors: Any patches that convert arm*-*-linux* to arm_eabi can be considered as obvious as long as you test them on an EABI compliant platform which freebsd appears to be getting towards. I haven't looked too deeply about the other "ordering" issue you mention here. regards Ramana > > --- > /build/gcc/obj_gcc_armv6/gcc/testsuite/gfortran/../../gfortran > -B/build/gcc/obj_gcc_armv6/gcc/testsuite/gfortran/../../ > -B/build/gcc/obj_gcc_armv6/armv6-unknown-freebsd11.0/./libgfortran/ > -fno-diagnostics-show-caret -fdiagnostics-color=never ./enum_10.c -c -o > arm_eabi89728.o arm_eabi89728.c^M > gfortran: fatal error: cannot specify -o with -c, -S or -E with multiple > files^M > compilation terminated.^M > --- > > The -c comes after the enum_10.c > > Attached the solution which makes the tests pass. I tested them under > FreeBSD armv6-*-freebsd11.0 and x86_64-unknown-freebsd11.0. Also under > x86_64-unknown-linux-gnu. All PASS. > > Would this be ok for trunk? > > TIA, > Andreas > > 2015-01-11 Andreas Tobler > > * gfortran.dg/enum_9.f90: Replace arm*-*-linux* with arm_eabi. > * gfortran.dg/enum_10.f90: Likewise. Reorder dg-additional-sources.
Re: [patch, arm] Minor optimization on thumb2 tail call
On 19/11/14 02:43, Joey Ye wrote: Current thumb2 -Os generates suboptimal code for following tail call case: int f4(int b, int a, int c, int d); int g(int a, int b, int c, int d) { return f4(b, a, c, d); } arm-none-eabi-gcc -Os -mthumb -mcpu=cortex-m3 test.c push {r4, lr} mov r4, r1 mov r1, r0 mov r0, r4 pop {r4, lr} b f4 There are two issues: The first one is that saving/restoring lr is not necessary, as there is no return via pop pc. The second one is that even if we managed to avoid lr push/pop, ldmia.w sp!, {r4} is still emitted as there is a missing pattern for pop single and code size is not optimal. This patch fixes these two issues and introduces a shared test case. CSiBE thumb2 -Os shows cross board code size reduction, except for one case with 4 bytes regression. The case is like: void f () { if () ... else if () ... else g(); } There are N=2 non-sibcall returns and S=1 sibcall return. Originally the non-sibcall returns are just pop {r4, r5, pc}, now they become b.n .Lreturn .Lreturn: pop {r4, r5} bx lr The one byte save from sibcall return does not win the non-sibcall return regressions back. In general scenario, number of N non-sibcall returns use b.n branching to merged tail, number of S sibcalls save 2 bytes by avoid poping lr. It results in 4-2*S bytes regression. In the worst scenario, each non-sibcall return has to use b.w branching to merged tail, resulting in (N-S)*2 bytes regression. The worst scenario is rare, according to CSiBE. The general regression scenario can only regress 2 bytes at most. So I would not introduce additional complexity to handle the regression case. Make check cortex-m3: pass thumb2 bootstrap (O2/Os): pass * config/arm/arm.c (arm_compute_save_reg_mask): Do not save lr in case of tail call. * config/arm/thumb2.md (*thumb2_pop_single): New pattern. * gcc.target/arm/thumb2-pop-single.c: New test. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 4f04707..20d0b9e 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -19190,6 +19190,7 @@ arm_compute_save_reg_mask (void) || (save_reg_mask && optimize_size && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL + && !crtl->tail_call_emit && !crtl->calls_eh_return)) save_reg_mask |= 1 << LR_REGNUM; diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index 64acfea..29cfb17 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -267,6 +267,17 @@ (set_attr "type" "multiple")] ) +;; Pop a single register as its size is preferred over a post-incremental load +(define_insn "*thumb2_pop_single" + [(set (match_operand:SI 0 "low_register_operand" "=r") +(mem:SI (post_inc:SI (reg:SI SP_REGNUM] + "TARGET_THUMB2 && (reload_in_progress || reload_completed)" + "pop\t{%0}" + [(set_attr "type" "load1") + (set_attr "length" "2") + (set_attr "predicable" "yes")] +) + ;; We have two alternatives here for memory loads (and similarly for stores) ;; to reflect the fact that the permissible constant pool ranges differ ;; between ldr instructions taking low regs and ldr instructions taking high This is OK thanks. Please CC me on ARM specific patches, this one somehow seems to have missed my filters. Ramana
Re: shift/extract SHIFT_COUNT_TRUNCATED combine bug
On Mon, Jan 12, 2015 at 11:12 PM, Jeff Law wrote: > On 04/08/14 14:07, Mike Stump wrote: >> >> Something broke in the compiler to cause combine to incorrectly optimize: >> >> (insn 12 11 13 3 (set (reg:SI 604 [ D.6102 ]) >> (lshiftrt:SI (subreg/s/u:SI (reg/v:DI 601 [ x ]) 0) >> (reg:SI 602 [ D.6103 ]))) t.c:47 4436 {lshrsi3} >> (expr_list:REG_DEAD (reg:SI 602 [ D.6103 ]) >> (nil))) >> (insn 13 12 14 3 (set (reg:SI 605) >> (and:SI (reg:SI 604 [ D.6102 ]) >> (const_int 1 [0x1]))) t.c:47 3658 {andsi3} >> (expr_list:REG_DEAD (reg:SI 604 [ D.6102 ]) >> (nil))) >> (insn 14 13 15 3 (set (reg:DI 599 [ D.6102 ]) >> (zero_extend:DI (reg:SI 605))) t.c:47 4616 {zero_extendsidi2} >> (expr_list:REG_DEAD (reg:SI 605) >> (nil))) >> >> into: >> >> (insn 11 10 12 3 (set (reg:SI 602 [ D.6103 ]) >> (not:SI (subreg:SI (reg:DI 595 [ D.6102 ]) 0))) t.c:47 3732 >> {one_cmplsi2} >> (expr_list:REG_DEAD (reg:DI 595 [ D.6102 ]) >> (nil))) >> (note 12 11 13 3 NOTE_INSN_DELETED) >> (note 13 12 14 3 NOTE_INSN_DELETED) >> (insn 14 13 15 3 (set (reg:DI 599 [ D.6102 ]) >> (zero_extract:DI (reg/v:DI 601 [ x ]) >> (const_int 1 [0x1]) >> (reg:SI 602 [ D.6103 ]))) t.c:47 4668 {c2_extzvdi} >> (expr_list:REG_DEAD (reg:SI 602 [ D.6103 ]) >> (nil))) >> >> This shows up in: >> >>FAIL: gcc.c-torture/execute/builtin-bitops-1.c execution, -Og -g >> >> for me. >> >> diff --git a/gcc/combine.c b/gcc/combine.c >> index 708691f..c1f50ff 100644 >> --- a/gcc/combine.c >> +++ b/gcc/combine.c >> @@ -7245,6 +7245,18 @@ make_extraction (enum machine_mode mode, rtx inner, >> HOST_WIDE_INT pos, >> extraction_mode = insn.field_mode; >> } >> >> + /* On a SHIFT_COUNT_TRUNCATED machine, we can't promote the mode of >> + the extract to a larger size on a variable extract, as previously >> + the position might have been optimized to change a bit of the >> + index of the starting bit that would have been ignored before, >> + but, with a larger mode, will then not be. If we wanted to do >> + this, we'd have to mask out those bits or prove that those bits >> + are 0. */ >> + if (SHIFT_COUNT_TRUNCATED >> + && pos_rtx >> + && GET_MODE_BITSIZE (extraction_mode) > GET_MODE_BITSIZE (mode)) >> +extraction_mode = mode; >> + >> /* Never narrow an object, since that might not be safe. */ >> >> if (mode != VOIDmode >> >> is sufficient to never widen variable extracts on SHIFT_COUNT_TRUNCATED >> machines. So, the question is, how did people expect this to work? I >> didn’t spot what changed recently to cause the bad code-gen. The >> optimization of sub into not is ok, despite how funny it looks, because is >> feeds into extract which we know by SHIFT_COUNT_TRUNCATED is safe. >> >> Is the patch a reasonable way to fix this? > > On a SHIFT_COUNT_TRUNCATED target, I don't think it's ever OK to widen a > shift, variable or constant. > > In the case of a variable shift, we could easily have eliminated the masking > code before or during combine. For a constant shift amount we could have > adjusted the constant (see SHIFT_COUNT_TRUNCATED in cse.c) > > I think it's just an oversight and it has simply never bit us before. IMHO SHIFT_COUNT_TRUNCATED should be removed and instead backends should provide shift patterns with a (and:QI ...) for the shift amount which simply will omit that operation if suitable. Richard. > jeff
Re: [PATCH] Fix PR64436: broken logic to process bitwise ORs in bswap pass
On Mon, 12 Jan 2015, Thomas Preud'homme wrote: > Hi all, > > To identify if a set of loads, shift, cast, mask (bitwise and) and bitwise OR > is equivalent to a load or byteswap, the bswap pass assign a number to each > byte loaded according to its significance (1 for lsb, 2 for next least > significant byte, etc.) and form a symbolic number such as 0x04030201 for a > 32bit load. When processing a bitwise OR of two such symbolic numbers, it is > necessary to consider the lowest and highest addresses where a byte was > loaded to renumber each byte accordingly. For instance if the two numbers are > 0x04030201 and they were loaded from consecutive word in memory the result > would be 0x0807060504030201 but if they overlap fully the result would be > 0x04030201. > > Currently the computation of the byte with highest address is broken: it > takes the byte with highest address of the symbolic number that starts > last. That is, if one number represents a 8bit load at address 0x14 and > another number represent a 32bit load at address 0x12 it will compute > the end as 0x14 instead of 0x15. This error affects the computation of > the size of the load for all targets and the computation of the symbolic > number that result from the bitwise OR for big endian targets. This is > what causes PR64436 due to a change in the gimple generated for that > testcase. > > ChangeLog entry is as follows: Ok. Thanks, Richard. > gcc/ChangeLog > > 2014-12-30 Thomas Preud'homme thomas.preudho...@arm.com > > PR tree-optimization/64436 > * tree-ssa-math-opts.c (find_bswap_or_nop_1): Move code performing the > merge of two symbolic numbers for a bitwise OR to ... > (perform_symbolic_merge): This. Also fix computation of the range and > end of the symbolic number corresponding to the result of a bitwise OR. > > diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c > index 1ed2838..286183a 100644 > --- a/gcc/tree-ssa-math-opts.c > +++ b/gcc/tree-ssa-math-opts.c > @@ -1816,6 +1816,123 @@ find_bswap_or_nop_load (gimple stmt, tree ref, struct > symbolic_number *n) >return true; > } > > +/* Compute the symbolic number N representing the result of a bitwise OR on 2 > + symbolic number N1 and N2 whose source statements are respectively > + SOURCE_STMT1 and SOURCE_STMT2. */ > + > +static gimple > +perform_symbolic_merge (gimple source_stmt1, struct symbolic_number *n1, > + gimple source_stmt2, struct symbolic_number *n2, > + struct symbolic_number *n) > +{ > + int i, size; > + uint64_t mask; > + gimple source_stmt; > + struct symbolic_number *n_start; > + > + /* Sources are different, cancel bswap if they are not memory location with > + the same base (array, structure, ...). */ > + if (gimple_assign_rhs1 (source_stmt1) != gimple_assign_rhs1 (source_stmt2)) > +{ > + int64_t inc; > + HOST_WIDE_INT start_sub, end_sub, end1, end2, end; > + struct symbolic_number *toinc_n_ptr, *n_end; > + > + if (!n1->base_addr || !n2->base_addr > + || !operand_equal_p (n1->base_addr, n2->base_addr, 0)) > + return NULL; > + > + if (!n1->offset != !n2->offset || > + (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0))) > + return NULL; > + > + if (n1->bytepos < n2->bytepos) > + { > + n_start = n1; > + start_sub = n2->bytepos - n1->bytepos; > + source_stmt = source_stmt1; > + } > + else > + { > + n_start = n2; > + start_sub = n1->bytepos - n2->bytepos; > + source_stmt = source_stmt2; > + } > + > + /* Find the highest address at which a load is performed and > + compute related info. */ > + end1 = n1->bytepos + (n1->range - 1); > + end2 = n2->bytepos + (n2->range - 1); > + if (end1 < end2) > + { > + end = end2; > + end_sub = end2 - end1; > + } > + else > + { > + end = end1; > + end_sub = end1 - end2; > + } > + n_end = (end2 > end1) ? n2 : n1; > + > + /* Find symbolic number whose lsb is the most significant. */ > + if (BYTES_BIG_ENDIAN) > + toinc_n_ptr = (n_end == n1) ? n2 : n1; > + else > + toinc_n_ptr = (n_start == n1) ? n2 : n1; > + > + n->range = end - n_start->bytepos + 1; > + > + /* Check that the range of memory covered can be represented by > + a symbolic number. */ > + if (n->range > 64 / BITS_PER_MARKER) > + return NULL; > + > + /* Reinterpret byte marks in symbolic number holding the value of > + bigger weight according to target endianness. */ > + inc = BYTES_BIG_ENDIAN ? end_sub : start_sub; > + size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT; > + for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER) > + { > + unsigned marker = > + (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK; > + if (marker && marker != MARKER_BYTE_UNKNOWN) > +
Re: flatten expr.h (version 2)
On Sun, 11 Jan 2015, Prathamesh Kulkarni wrote: > Hi, > This is a revamped expr.h flattening flattening patch rebased on > tree.h and tree-core.h flattening patch (r219402). > It depends upon the following patch to get committed. > https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00565.html > > Changes: > * Removed all includes except tree-core.h. Put includes required by > expr.h in a comment. > * Moved stmt.c, expmed.c prototypes to stmt.h, expmed.h respectively. > * Adjusted generator programs: genemit.c, gengtype.c, genopinit.c, > genoutput.c. > * Did not put includes in gcc-plugin.h since expr.h cannot be included > by plugins > (putting them broke building a file in c-family/ since expr.h is not > allowed in front-ends) > * Affects java front-end (expr.h is allowed in java front-end). > > Bootstrapped and tested on x86_64-unknown-linux-gnu with languages: > all,go,ada,jit > Built on all targets in config-list.mk with languages: all, go. > OK to commit ? diff --git a/gcc/expr.c b/gcc/expr.c index fc22862..824541e 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -41,11 +41,17 @@ along with GCC; see the file COPYING3. If not see #include "regs.h" #include "hard-reg-set.h" #include "except.h" -#include "input.h" #include "function.h" #include "insn-config.h" #include "insn-attr.h" /* Include expr.h after insn-config.h so we get HAVE_conditional_move. */ +#include "hashtab.h" +#include "emit-rtl.h" +#include "expmed.h" +#include "stmt.h" +#include "statistics.h" +#include "real.h" +#include "fixed-value.h" #include "expr.h" Please move the comment to the proper place diff --git a/gcc/expr.h b/gcc/expr.h index a7638b8..f1be8dc 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -20,7 +20,8 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_EXPR_H #define GCC_EXPR_H -/* For inhibit_defer_pop */ +/* expr.h required includes */ +#if 0 #include "hashtab.h" #include "hash-set.h" #include "vec.h" @@ -29,15 +30,17 @@ along with GCC; see the file COPYING3. If not see #include "hard-reg-set.h" #include "input.h" #include "function.h" -/* For XEXP, GEN_INT, rtx_code */ #include "rtl.h" -/* For optimize_size */ #include "flags.h" -/* For tree_fits_[su]hwi_p, tree_to_[su]hwi, fold_convert, size_binop, - ssize_int, TREE_CODE, TYPE_SIZE, int_size_in_bytes,*/ #include "tree-core.h" -/* For GET_MODE_BITSIZE, word_mode */ #include "insn-config.h" +#include "alias.h" +#include "emit-rtl.h" +#include "expmed.h" +#include "stmt.h" +#endif Err, please remove the #if 0 section + +#include "tree-core.h" Why? The original comment says -/* For tree_fits_[su]hwi_p, tree_to_[su]hwi, fold_convert, size_binop, - ssize_int, TREE_CODE, TYPE_SIZE, int_size_in_bytes,*/ but all those are declared in tree.h. Which means the files including expr.h must already include tree.h. If that's not the reason we need to include tree-core.h from expr.c please add a comment explaining why. -/* Definitions from emit-rtl.c */ -#include "emit-rtl.h" - /* Return a memory reference like MEMREF, but with its mode widened to MODE and adjusted by OFFSET. */ extern rtx widen_memory_access (rtx, machine_mode, HOST_WIDE_INT); err - functions defined in emit-rtl.c should be declared in emit-rtl.h. Please fix that first. expr.h should _only_ contain prototypes for stuff defined in expr.c. Andrew did a good job with this, first cleaning up a header moving declarations to proper places and only after that flattening it. The rest of the patch looks good to me but expr.h isn't in a good shape after it. Thanks, Richard.
Re: [PATCH][ARM] FreeBSD ARM support, EABI, v3
On Thu, Jan 8, 2015 at 8:51 PM, Andreas Tobler wrote: > On 08.01.15 17:27, Richard Earnshaw wrote: >> >> On 29/12/14 18:44, Andreas Tobler wrote: >>> >>> All, >>> >>> here is the third attempt to support ARM with FreeBSD. >>> >>> In the meantime we found another issue in the unwinder where I had to >>> adapt some stuff. >>> >>> The unwind_phase2_forced function in libgcc calls a stop_fn function. >>> This stop_fn is in FreeBSD's libthr implementation and is called >>> thread_unwind_stop. This thread_unwind_stop is a generic function used >>> on all FreeBSD archs. >>> >>> The issue is now that this thread_unwind_stop expects a double int for >>> the exception_class, like on every other arch. For ARM EABI this >>> exception_class is an array of char which is passed in one register as >>> pointer vs. two registers for a double int. >>> >>> To solve this issue we defined the exception_class as double integer for >>> FreeBSD. My apologies for the slow response, some other work and then holidays intervened. >From my understanding of the ABI document the implementation is currently as mandated by the ABI. Also this isn't a part of the ABI that's available for the platform (here FreeBSD to manipulate and change as per it's wishes). ARM EHABI is special for software, making FreeBSD more "special" for ARM appears to be counter intuitive from my point of view. A number of exception unwinding libraries. for e.g. libobjc , libstdc++ all use this implementation of exception_class. Therefore this creates a divergence for the FreeBSD port which is different from everything else. I expect that a number of language run time support libraries that supported the ARM EHABI would be using such an implementation, therefore you need to fix every single implementation of this in every unwinder that supports the ARM EHABI which I expect to have been ported to in a number of libraries already. (I already see this in libobjc and libstdc++ in the GCC tree) I would rather fix the thread_unwind_stop implementation in libthr for ARM EHABI rather than make this change. >>> >>> This adaptation reduced the failure count in libstdc++ by about 40 fails. >>> >>> I build and test this port on a regular basis and I post the results to >>> the usual place. Thanks for doing this. I'm really glad that FreeBSD is finally moving to EABI. regards Ramana > > > ... > >> Umm, sorry, just seen this update to the previous patch. >> >> The changes to the exception unwinding look a bit more involved. Could >> you separate that out into a separate patch, so that it's easier to see >> what you're changing? > > > Ok, here the mentioned part as separate diff. The comments are above. The CL > below :) > > Thank you very much! > > Andreas > > gcc: > > * ginclude/unwind-arm-common.h (_Uwind_Control_Block): Define > exception_class as double integer for FreeBSD ARM. > (_Unwind_Exception): Define _Unwind_Exception_Class as double > integer > for FreeBSD ARM. > > libstc++-v3: > > > * libsupc++/unwind-cxx.h (__is_gxx_exception_class, > __is_dependent_exception): Exclude FreeBSD ARM from the > __ARM_EABI_UNWINDER__ ifdef. > >
Re: flatten expr.h (version 2)
On 13 January 2015 at 15:34, Richard Biener wrote: > On Sun, 11 Jan 2015, Prathamesh Kulkarni wrote: > >> Hi, >> This is a revamped expr.h flattening flattening patch rebased on >> tree.h and tree-core.h flattening patch (r219402). >> It depends upon the following patch to get committed. >> https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00565.html >> >> Changes: >> * Removed all includes except tree-core.h. Put includes required by >> expr.h in a comment. >> * Moved stmt.c, expmed.c prototypes to stmt.h, expmed.h respectively. >> * Adjusted generator programs: genemit.c, gengtype.c, genopinit.c, >> genoutput.c. >> * Did not put includes in gcc-plugin.h since expr.h cannot be included >> by plugins >> (putting them broke building a file in c-family/ since expr.h is not >> allowed in front-ends) >> * Affects java front-end (expr.h is allowed in java front-end). >> >> Bootstrapped and tested on x86_64-unknown-linux-gnu with languages: >> all,go,ada,jit >> Built on all targets in config-list.mk with languages: all, go. >> OK to commit ? > > diff --git a/gcc/expr.c b/gcc/expr.c > index fc22862..824541e 100644 > --- a/gcc/expr.c > +++ b/gcc/expr.c > @@ -41,11 +41,17 @@ along with GCC; see the file COPYING3. If not see > #include "regs.h" > #include "hard-reg-set.h" > #include "except.h" > -#include "input.h" > #include "function.h" > #include "insn-config.h" > #include "insn-attr.h" > /* Include expr.h after insn-config.h so we get HAVE_conditional_move. > */ > +#include "hashtab.h" > +#include "emit-rtl.h" > +#include "expmed.h" > +#include "stmt.h" > +#include "statistics.h" > +#include "real.h" > +#include "fixed-value.h" > #include "expr.h" > > Please move the comment to the proper place ah, my flattening tool doesn't look at comments. I will move the comment before expr.h include, thanks. > > diff --git a/gcc/expr.h b/gcc/expr.h > index a7638b8..f1be8dc 100644 > --- a/gcc/expr.h > +++ b/gcc/expr.h > @@ -20,7 +20,8 @@ along with GCC; see the file COPYING3. If not see > #ifndef GCC_EXPR_H > #define GCC_EXPR_H > > -/* For inhibit_defer_pop */ > +/* expr.h required includes */ > +#if 0 > #include "hashtab.h" > #include "hash-set.h" > #include "vec.h" > @@ -29,15 +30,17 @@ along with GCC; see the file COPYING3. If not see > #include "hard-reg-set.h" > #include "input.h" > #include "function.h" > -/* For XEXP, GEN_INT, rtx_code */ > #include "rtl.h" > -/* For optimize_size */ > #include "flags.h" > -/* For tree_fits_[su]hwi_p, tree_to_[su]hwi, fold_convert, size_binop, > - ssize_int, TREE_CODE, TYPE_SIZE, int_size_in_bytes,*/ > #include "tree-core.h" > -/* For GET_MODE_BITSIZE, word_mode */ > #include "insn-config.h" > +#include "alias.h" > +#include "emit-rtl.h" > +#include "expmed.h" > +#include "stmt.h" > +#endif > > Err, please remove the #if 0 section I kept it because if something breaks later (hopefully not!), it will be easier to fix. I will remove it. > > + > +#include "tree-core.h" > > Why? The original comment says > > -/* For tree_fits_[su]hwi_p, tree_to_[su]hwi, fold_convert, size_binop, > - ssize_int, TREE_CODE, TYPE_SIZE, int_size_in_bytes,*/ > > but all those are declared in tree.h. Which means the files including > expr.h must already include tree.h. > > If that's not the reason we need to include tree-core.h from expr.c > please add a comment explaining why. bt-load.c fails to compile because it includes expr.h but does not include tree.h I will place tree.h include in all files that include expr.h and rebuild. > > -/* Definitions from emit-rtl.c */ > -#include "emit-rtl.h" > - > /* Return a memory reference like MEMREF, but with its mode widened to > MODE and adjusted by OFFSET. */ > extern rtx widen_memory_access (rtx, machine_mode, HOST_WIDE_INT); > > err - functions defined in emit-rtl.c should be declared in emit-rtl.h. > Please fix that first. expr.h should _only_ contain prototypes > for stuff defined in expr.c. oops, missed it :( > > Andrew did a good job with this, first cleaning up a header moving > declarations to proper places and only after that flattening it. > > The rest of the patch looks good to me but expr.h isn't in a good > shape after it. I will work on it and send patch with suggested changes by tomorrow. Thanks, Prathamesh > > Thanks, > Richard.
Re: [Patch, AArch64, Testsuite] Check for expected MOVI vectorization.
On 9 January 2015 at 16:31, Tejas Belagod wrote: > gcc/testsuite: > > * gcc.target/aarch64/vect-movi.c: Check for vectorization for > 64-bit and 128-bit. OK /Marcus
[AArch64] Allow stack pointer as first input to a subtraction
Several sub-based patterns allowed the stack pointer to be the destination but not the first source. This looked like an oversight; in all the patterns changed here (but not for example in *sub_mul_imm_), the instruction allows the stack pointer to appear in both positions. Tested on aarch64-linux-gnu. OK to install? Thanks, Richard gcc/ * config/aarch64/aarch64.md (subsi3, *subsi3_uxtw, subdi3) (*sub__, *sub__si_uxtw) (*sub__shft_) (*sub__shft_si_uxtw, *sub__multp2) (*sub_si_multp2_uxtw, *sub_uxt_multp2) (*sub_uxtsi_multp2_uxtw): Add stack pointer sources. gcc/testsuite/ * gcc.target/aarch64/subsp.c: New test. Index: gcc/config/aarch64/aarch64.md === --- gcc/config/aarch64/aarch64.md 2015-01-13 09:48:26.901649982 + +++ gcc/config/aarch64/aarch64.md 2015-01-13 09:48:26.897650031 + @@ -1889,8 +1889,8 @@ (define_insn "*add_uxtsi_multp2_uxtw" (define_insn "subsi3" [(set (match_operand:SI 0 "register_operand" "=rk") - (minus:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "register_operand" "r")))] + (minus:SI (match_operand:SI 1 "register_operand" "rk") + (match_operand:SI 2 "register_operand" "r")))] "" "sub\\t%w0, %w1, %w2" [(set_attr "type" "alu_sreg")] @@ -1900,7 +1900,7 @@ (define_insn "subsi3" (define_insn "*subsi3_uxtw" [(set (match_operand:DI 0 "register_operand" "=rk") (zero_extend:DI - (minus:SI (match_operand:SI 1 "register_operand" "r") + (minus:SI (match_operand:SI 1 "register_operand" "rk") (match_operand:SI 2 "register_operand" "r"] "" "sub\\t%w0, %w1, %w2" @@ -1909,8 +1909,8 @@ (define_insn "*subsi3_uxtw" (define_insn "subdi3" [(set (match_operand:DI 0 "register_operand" "=rk,w") - (minus:DI (match_operand:DI 1 "register_operand" "r,w") - (match_operand:DI 2 "register_operand" "r,w")))] + (minus:DI (match_operand:DI 1 "register_operand" "rk,w") + (match_operand:DI 2 "register_operand" "r,w")))] "" "@ sub\\t%x0, %x1, %x2 @@ -2013,7 +2013,7 @@ (define_insn "*sub_mul_imm_si_uxtw" (define_insn "*sub__" [(set (match_operand:GPI 0 "register_operand" "=rk") - (minus:GPI (match_operand:GPI 1 "register_operand" "r") + (minus:GPI (match_operand:GPI 1 "register_operand" "rk") (ANY_EXTEND:GPI (match_operand:ALLX 2 "register_operand" "r"] "" @@ -2025,7 +2025,7 @@ (define_insn "*sub___si_uxtw" [(set (match_operand:DI 0 "register_operand" "=rk") (zero_extend:DI - (minus:SI (match_operand:SI 1 "register_operand" "r") + (minus:SI (match_operand:SI 1 "register_operand" "rk") (ANY_EXTEND:SI (match_operand:SHORT 2 "register_operand" "r")] "" @@ -2035,7 +2035,7 @@ (define_insn "*sub__s (define_insn "*sub__shft_" [(set (match_operand:GPI 0 "register_operand" "=rk") - (minus:GPI (match_operand:GPI 1 "register_operand" "r") + (minus:GPI (match_operand:GPI 1 "register_operand" "rk") (ashift:GPI (ANY_EXTEND:GPI (match_operand:ALLX 2 "register_operand" "r")) (match_operand 3 "aarch64_imm3" "Ui3"] @@ -2048,7 +2048,7 @@ (define_insn "*sub__sh (define_insn "*sub__shft_si_uxtw" [(set (match_operand:DI 0 "register_operand" "=rk") (zero_extend:DI - (minus:SI (match_operand:SI 1 "register_operand" "r") + (minus:SI (match_operand:SI 1 "register_operand" "rk") (ashift:SI (ANY_EXTEND:SI (match_operand:SHORT 2 "register_operand" "r")) (match_operand 3 "aarch64_imm3" "Ui3")] @@ -2059,7 +2059,7 @@ (define_insn "*sub__s (define_insn "*sub__multp2" [(set (match_operand:GPI 0 "register_operand" "=rk") - (minus:GPI (match_operand:GPI 4 "register_operand" "r") + (minus:GPI (match_operand:GPI 4 "register_operand" "rk") (ANY_EXTRACT:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r") (match_operand 2 "aarch64_pwr_imm3" "Up3")) @@ -2074,7 +2074,7 @@ (define_insn "*sub__multp2" (define_insn "*sub_si_multp2_uxtw" [(set (match_operand:DI 0 "register_operand" "=rk") (zero_extend:DI - (minus:SI (match_operand:SI 4 "register_operand" "r") + (minus:SI (match_operand:SI 4 "register_operand" "rk") (ANY_EXTRACT:SI (mult:SI (match_operand:SI 1 "register_operand" "r") (match_operand 2 "aarch64_pwr_imm3" "Up3")) @@ -2113,7 +2113,7 @@ (define_insn "*subsi3_carryin_uxtw" (define_insn "*sub_uxt_multp2" [(set (match_operand:GPI 0 "register_operand" "=rk") - (minus:GPI (mat
Re: [gomp4] Replace enum omp_clause_map_kind with enum gomp_map_kind (was: Including a file from include/ in gcc/*.h)
Hi! On Mon, 12 Jan 2015 17:39:16 +0100, Jakub Jelinek wrote: > On Mon, Jan 12, 2015 at 05:32:14PM +0100, Thomas Schwinge wrote: > > I have now committed the patch to gomp-4_0-branch in the following form. > > The issues raised above remain to be resolved. (I'll try to address those later on.) > > In spirit against the tree.h header flattening, I had to keep the > > #include "include/gomp-constants.h" in gcc/tree-core.h, because otherwise > > I'd have to add it to a ton of *.c files, just for the enum gomp_map_kind > > definition. > > > > I found that in the C++ dialect used by GCC, it is not possible to > > declare an enum without giving the list of enumerators. N2764 (from > > 2008) resolved this by adding appropriate syntax for declaring enums, > > however: "warning: scoped enums only available with -std=c++11 or > > -std=gnu++11". If it were possible to use this, we could add to > > gcc/tree-core.h: > > > > enum gomp_map_kind : char; > > > > ... (or similar), and this way decouple the declaration (gcc/tree-core.h) > > From the actual "population of it" (include/gomp-constants.h). > > Alternatively, in gcc/tree-core.h:struct tree_omp_clause, we could switch > > the map_kind member from enum gomp_map_kind to a char -- but that would > > defeat the usage of an enum (easy pretty-printing of its enumerators in > > GDB, and so on.). > > Or just don't do this and duplicate the constants and just assert somewhere > (in omp-low.c) at compile time that all the values match. > Either using char and casting the value only in the OMP_* macros > or duplicating the values sound preferrable over including > include/gomp-constants.h from tree-core.h. Indeed I've found precedent in gcc/tree.h: there already are a few *_SET_* functions, also used for casting to/from enum types. Committed to gomp-4_0-branch in r219524: commit 7dbb7ec6c08d604926fca30e105d2b6411cf73cb Author: tschwinge Date: Tue Jan 13 10:50:01 2015 + Avoid inclusion of "gomp-constants.h" in gcc/tree-core.h. N2764 (from 2008) added syntax for declaring enums, however: "warning: scoped enums only available with -std=c++11 or -std=gnu++11": in the C++ dialect currently used by GCC, it is not possible to declare an enum without giving the full list of enumerators. If it were possible to use this, we could add to gcc/tree-core.h: enum gomp_map_kind : unsigned char; ..., and keep using enum gomp_map_kind for gcc/tree-core.h's struct tree_omp_clause's map_kind member, and this way decouple the declaration (gcc/tree-core.h) from the actual "population of it" (include/gomp-constants.h). Until switching GCC to C++11, we'll have to do as follows: gcc/ * tree-core.h: Don't include "gomp-constants.h". (struct tree_omp_clause): Change type of map_kind member from enum gomp_map_kind to unsigned char. * tree.h (OMP_CLAUSE_MAP_KIND): Cast it to enum gomp_map_kind. (OMP_CLAUSE_SET_MAP_KIND): New macro. * gimplify.c (gimplify_adjust_omp_clauses_1) (gimplify_adjust_omp_clauses): Use OMP_CLAUSE_SET_MAP_KIND. * omp-low.c (oacc_initialize_reduction_data): Likewise. * tree-nested.c (convert_nonlocal_reference_stmt) (convert_local_reference_stmt, convert_gimple_call): Likewise. * tree-streamer-in.c (unpack_ts_omp_clause_value_fields): Likewise. gcc/c/ * c-parser.c (c_parser_oacc_data_clause) (c_parser_oacc_data_clause_deviceptr, c_parser_omp_clause_map): Use OMP_CLAUSE_SET_MAP_KIND. * c-typeck.c (handle_omp_array_sections): Likewise. gcc/cp/ * parser.c (cp_parser_oacc_data_clause) (cp_parser_oacc_data_clause_deviceptr, cp_parser_omp_clause_map): Use OMP_CLAUSE_SET_MAP_KIND. * semantics.c (handle_omp_array_sections): Likewise. gcc/fortran/ * trans-openmp.c (gfc_omp_finish_clause, gfc_trans_omp_clauses): Use OMP_CLAUSE_SET_MAP_KIND. gcc/ * lto-streamer-out.c: Include "gomp-constants.h". * tree-streamer-in.c: Likewise. * tree-streamer-out.c: Likewise. gcc/lto/ * lto.c: Include "gomp-constants.h". git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@219524 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.gomp | 19 +++ gcc/c/ChangeLog.gomp | 7 +++ gcc/c/c-parser.c | 6 +++--- gcc/c/c-typeck.c | 2 +- gcc/cp/ChangeLog.gomp | 7 +++ gcc/cp/parser.c| 6 +++--- gcc/cp/semantics.c | 4 ++-- gcc/fortran/trans-openmp.c | 46 +++--- gcc/gimplify.c | 11 ++- gcc/lto-streamer-out.c | 1 + gcc/lto/ChangeLog.gomp | 4 gcc/lto/lto.c | 1 + gcc/omp-low.c | 2 +- gcc/tree-core.h| 5 ++--- gcc/tree-nested.c
Re: [AARCH64][PR63424][4.9]Backport "Fix PR63424 by adding v2di3 pattern"
On 7 January 2015 at 14:01, Renlin Li wrote: > Is it Okay for branch 4.9? > > gcc/ChangeLog: > > 2014-11-19 Renlin Li > PR target/63424 > * config/aarch64/aarch64-simd.md (v2di3): New. > > gcc/testsuite/ChangeLog: > > 2014-11-19 Renlin Li > PR target/63424 > * gcc.target/aarch64/pr63424.c: New Test. > OK /Marcus
Re: [PATCH] Fix PR64461, Incorrect code on coldfire targets
Jeff Law writes: > For "fun" I've got an m68k bootstrap of the trunk running. I don't expect > it to finish for at least a week or so, assuming it runs to completion. The last time I did that it took about 10 days (with all languages enabled, running in Aranym on a moderately fast host). Andreas. -- Andreas Schwab, SUSE Labs, sch...@suse.de GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7 "And now for something completely different."
Re: [Fortran, Patch] PR60334 - Segmentation fault on character pointer assignments
Hi Paul, thanks for the reviewed and the valued comments. Just for completeness I have attached the patch with the changes requested. Bootstraps and regtests ok on x86_64-linux-gnu. Regards, Andre On Mon, 12 Jan 2015 22:07:29 +0100 Paul Richard Thomas wrote: > Hi Andre, > > + if (INDIRECT_REF_P (parmse.string_length)) > +/* In chains of functions/procedure calls the string_length already > + is a pointer to the variable holding the length. Therefore > + remove the deref on call. */ > +parmse.string_length = TREE_OPERAND (parmse.string_length, 0); > > This is OK but I would use instead: > + if (POINTER_TYPE_P (parmse.string_length)) > +/* In chains of functions/procedure calls the string_length already > + is a pointer to the variable holding the length. Therefore > + remove the deref on call. */ > +parmse.string_length = build_fold_indirect_ref > (parmse.string_length); > > If you look in ~/gcc/fold-const.c:15751, you will see that > TREE_OPERAND (parmse.string_length, 0) but that it is preceded by > cleaning up of NOOPS and, in any case, its usage will preserve the > standard API just in case the internals change :-) > > of course, using TREE_OPERAND (xxx, 0) in the various fortran class > functions makes such an assumption ;-) > > Apart from that, the patch is fine. > > I'll have a session of doing some commits later this week and will do > this patch at that time. > > Cheers > > Paul > > On 11 January 2015 at 16:21, Andre Vehreschild wrote: > > Hi Paul, > > > > thanks for the review. I do not have commits rights. > > > > Unfortunately is the patch not ok. I figured today, that it needs an > > extension when function calls that return deferred char len arrays are > > nested. In this special case the string length would have been lost. The > > attached extended version fixes this issue. > > > > Sorry for the duplicate work. > > > > Bootstraps and regtests ok on x86_64-linux-gnu. > > > > Regards, > > Andre > > > > On Sun, 11 Jan 2015 16:11:10 +0100 > > Paul Richard Thomas wrote: > > > >> Dear Andre, > >> > >> This is OK for trunk. I have not been keeping track of whether or not > >> you have commit rights yet. If not, I will get to it sometime this > >> week. > >> > >> Thanks for the patch. > >> > >> Paul > >> > >> On 10 January 2015 at 15:59, Andre Vehreschild wrote: > >> > Hi all, > >> > > >> > attached patch fixes the bug reported in pr 60334. The issue here was > >> > that the function's result being (a pointer to) a deferred length char > >> > array. The string length for the result value was wrapped in a local > >> > variable, whose value was never written back to the string length of the > >> > result. This lead the calling routine to take the length of the result > >> > to be random leading to a crash. > >> > > >> > This patch addresses the issue by preventing the instantiation of the > >> > local var and instead using a reference to the parameter. This not only > >> > saves one value on the stack, but also because for small functions the > >> > compiler will hold all parameters in registers for a significant level > >> > of optimization, all the overhead of memory access (I hope :-). > >> > > >> > Bootstraps and regtests ok on x86_64-linux-gnu. > >> > > >> > - Andre > >> > -- > >> > Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen > >> > Tel.: +49 241 9291018 * Email: ve...@gmx.de > >> > >> > >> > > > > > > -- > > Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen > > Tel.: +49 241 9291018 * Email: ve...@gmx.de > > > -- Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen Tel.: +49 241 9291018 * Email: ve...@gmx.de pr60334_3.clog Description: Binary data diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index 1e74125..86873f7 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -1333,12 +1333,30 @@ gfc_get_symbol_decl (gfc_symbol * sym) (sym->ts.u.cl->passed_length == sym->ts.u.cl->backend_decl)) sym->ts.u.cl->backend_decl = NULL_TREE; - if (sym->ts.deferred && fun_or_res - && sym->ts.u.cl->passed_length == NULL - && sym->ts.u.cl->backend_decl) + if (sym->ts.deferred && byref) { - sym->ts.u.cl->passed_length = sym->ts.u.cl->backend_decl; - sym->ts.u.cl->backend_decl = NULL_TREE; + /* The string length of a deferred char array is stored in the + parameter at sym->ts.u.cl->backend_decl as a reference and + marked as a result. Exempt this variable from generating a + temporary for it. */ + if (sym->attr.result) + { + /* We need to insert a indirect ref for param decls. */ + if (sym->ts.u.cl->backend_decl + && TREE_CODE (sym->ts.u.cl->backend_decl) == PARM_DECL) + sym->ts.u.cl->backend_decl = + build_fold_indirect_ref (sym->ts.u.cl->backend_decl); + } + /* For all other parameters make sure, that they are copied so + that the value and any
Re: [PATCH] Fix up computed goto on POINTERS_EXTEND_UNSIGNED targets (PR middle-end/63974)
On Mon, Jan 12, 2015 at 12:19 PM, Jakub Jelinek wrote: > Hi! > > The 991213-3.c testcase ICEs on aarch64-linux with -mabi=ilp32 > since wide-int merge. The problem is that > x = convert_memory_address (Pmode, x) > is used twice on a VOIDmode CONST_INT, which is wrong. > For non-VOIDmode rtl the second convert_memory_address > is a NOP, but for VOIDmode the second call treats the CONST_INT > returned by the first call as if it was again ptr_mode, rather > than Pmode. On aarch64-linux in particular, the constant is > zero-extended from SImode to DImode in the first call, so it > is not valid SImode CONST_INT any longer. > > emit_indirect_jump always calls convert_memory_address (Pmode, ...) > on the operand in optabs.c when handling EXPAND_ADDRESS case > in maybe_legitimize_operand, so the first convert_memory_address > is both unnecessary and harmful. > > Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux > (which do not define POINTERS_EXTEND_UNSIGNED) and tested on the > problematic testcase with aarch64-linux cross. Can anyone with > easy access to POINTERS_EXTEND_UNSIGNED targets (aarch64-linux ilp32, > x86_64 -mx32, ia64-hpux) please test this? > > Ok for trunk if it works there? > > 2015-01-12 Jakub Jelinek > > PR middle-end/63974 > * cfgexpand.c (expand_computed_goto): Don't call > convert_memory_address here. > > --- gcc/cfgexpand.c.jj 2015-01-09 21:59:54.0 +0100 > +++ gcc/cfgexpand.c 2015-01-12 14:41:35.210705174 +0100 > @@ -3060,8 +3060,6 @@ expand_computed_goto (tree exp) > { >rtx x = expand_normal (exp); > > - x = convert_memory_address (Pmode, x); > - >do_pending_stack_adjust (); >emit_indirect_jump (x); > } > No regressions on x32. -- H.J.
Re: [PATCH/AARCH64] Disable load/store pair peephole for volatile mem
On 10 December 2014 at 02:18, Andrew Pinski wrote: > Hi, > As mentioned in > https://gcc.gnu.org/ml/gcc-patches/2014-12/msg00609.html, the > load/store pair peepholes currently accept volatile mem which can > cause wrong code as the architecture does not define which part of the > pair happens first. > > This patch disables the peephole for volatile mem and adds two > testcases so that volatile loads are not converted into load pair (I > could add the same for store pair if needed). In the second testcase, > only f3 does not get converted to load pair, even though the order of > the loads are different. > > OK? Bootstrapped and tested on aarch64-linux-gnu without any regressions. > > Thanks, > Andrew Pinski > > ChangeLog: > * config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp): Reject > volatile mems. > (aarch64_operands_adjust_ok_for_ldpstp): Likewise. > > testsuite/ChangeLog: > * gcc.target/aarch64/volatileloadpair-1.c: New testcase. > * gcc.target/aarch64/volatileloadpair-2.c: New testcase. OK. Bin, Feel free to follow up with a patch to reorg the MEM_P /Marcus
Re: [AArch64] Allow stack pointer as first input to a subtraction
On 13 January 2015 at 10:47, Richard Sandiford wrote: > Several sub-based patterns allowed the stack pointer to be the destination > but not the first source. This looked like an oversight; in all the patterns > changed here (but not for example in *sub_mul_imm_), the instruction > allows the stack pointer to appear in both positions. > > Tested on aarch64-linux-gnu. OK to install? > > Thanks, > Richard > > > gcc/ > * config/aarch64/aarch64.md (subsi3, *subsi3_uxtw, subdi3) > (*sub__, *sub__si_uxtw) > (*sub__shft_) > (*sub__shft_si_uxtw, *sub__multp2) > (*sub_si_multp2_uxtw, *sub_uxt_multp2) > (*sub_uxtsi_multp2_uxtw): Add stack pointer sources. > > gcc/testsuite/ > * gcc.target/aarch64/subsp.c: New test. OK /Marcus
Re: [PATCH/AARCH64] Correctly handle stores of zero in fusion_load_store
On 13 January 2015 at 04:48, Andrew Pinski wrote: > ChangeLog: > * config/aarch64/aarch64.c (fusion_load_store): Check dest mode > instead of src mode. > > > * gcc.target/aarch64/store-pair-1.c: New testcase. OK, thanks /Marcus
Re: [PATCH] Fix PR64404
On Mon, 12 Jan 2015, Richard Biener wrote: > > I am testing the following patch to fix a latent bug in the vectorizer > dealing with redundant DRs. > > Bootstrap and regtest pending on x86_64-unknown-linux-gnu. Which shows the patch is bogus. Instead we are not prepared to handle this situation. Thus the following patch rejects it, making the testcase a runtime one as well. Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2015-01-13 Richard Biener PR tree-optimization/64404 * tree-vect-stmts.c (vectorizable_load): Reject conflicting SLP types for CSEd loads. * gcc.dg/vect/pr64404.c: New testcase. Index: gcc/tree-vect-stmts.c === --- gcc/tree-vect-stmts.c (revision 219520) +++ gcc/tree-vect-stmts.c (working copy) @@ -5791,6 +5791,20 @@ vectorizable_load (gimple stmt, gimple_s "group loads with negative dependence distance\n"); return false; } + + /* Similarly when the stmt is a load that is both part of a SLP + instance and a loop vectorized stmt via the same-dr mechanism +we have to give up. */ + if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info) + && (STMT_SLP_TYPE (stmt_info) + != STMT_SLP_TYPE (vinfo_for_stmt +(STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, +"conflicting SLP types for CSEd load\n"); + return false; + } } Index: gcc/testsuite/gcc.dg/vect/pr64404.c === --- gcc/testsuite/gcc.dg/vect/pr64404.c (revision 0) +++ gcc/testsuite/gcc.dg/vect/pr64404.c (working copy) @@ -0,0 +1,59 @@ +/* { dg-do run } */ +/* { dg-additional-options "--param=sccvn-max-alias-queries-per-access=1" } */ + +#include "tree-vect.h" + +extern void abort (void); + +typedef struct +{ + int l, h; +} tFPinterval; + +tFPinterval X[1024]; +tFPinterval Y[1024]; +tFPinterval Z[1024]; + +void __attribute__((noinline)) +Compute (void) +{ + int d; + for (d = 0; d < 1024; d++) +{ + Y[d].l = X[d].l + X[d].h; + Y[d].h = Y[d].l; + Z[d].l = X[d].l; + Z[d].h = X[d].h; +} +} + +int +main (void) +{ + int d; + + check_vect (); + + for (d = 0; d < 1024; d++) +{ + X[d].l = d; + X[d].h = d + 1; + __asm__ volatile (""); +} + + Compute (); + + for (d = 0; d < 1024; d++) +{ + if (Y[d].l != X[d].l + X[d].h +|| Y[d].h != Y[d].l +|| Z[d].l != X[d].l +|| Z[d].h != X[d].h) + abort (); + __asm__ volatile (""); +} + + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */
Re: [testsuite] PATCH: Add check_effective_target_pie
On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote: > On 01/12/15 14:51, Magnus Granberg wrote: > >mĂĄndag 12 januari 2015 12.11.17 skrev H.J. Lu: > >>On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law wrote: > >>>On 01/12/15 12:59, H.J. Lu wrote: > I don't know if -pg will work PIE on any targets. For Linux/x86 > the choices of crt1.o are > > %{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} > > -shared, -pg and -pie are mutually exclusive. Those crt1 files are > only crt1 files provided by glibc. You can't even try -pg -pie on > Linux without changing glibc. > >>> > >>>You're totally missing the point. What I care about is *why*. > >>> > >With -pg it use gcrt1.o object file and that file is not compile with -fPIC. > >When you build a shared lib on x86_64 all the objects files need to be buiit > >with -fPIC else you get a error like that one abow and it is the same > >problems > >when you build bin with -fPIE and linke with -pie. > >Glibc do not provide one that is compile with -fPIC > Is there some reason why glibc could not provide gcrt1.o compiled with > -fPIC? That is a good question. We can compile gcrt1.o with -fPIC and it will work with both -pg and -pg -pie. I will open a glibc bug. Here is the updated patch without the check_profiling_available change. OK for trunk? Thanks. H.J. --- Subject: [PATCH 1/5] Add check_effective_target_pie Hi, This patch adds check_effective_target_pie to check if the current multilib generates PIE by default. Thanks. H.J. --- 2015-01-11 H.J. Lu * gcc.target/i386/pie.c: New test. * lib/target-supports.exp (check_effective_target_pie): New. --- gcc/testsuite/gcc.target/i386/pie.c | 12 gcc/testsuite/lib/target-supports.exp | 10 ++ 2 files changed, 22 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pie.c diff --git a/gcc/testsuite/gcc.target/i386/pie.c b/gcc/testsuite/gcc.target/i386/pie.c new file mode 100644 index 000..0a9f5ee --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pie.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target pie } } */ +/* { dg-options "-O2" } */ + +int foo (void); + +int +main (void) +{ + return foo (); +} + +/* { dg-final { scan-assembler "foo@PLT" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index f5c6db8..0ac9646 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1080,6 +1080,16 @@ proc check_effective_target_nonpic { } { }] } +# Return 1 if the current multilib generates PIE by default. + +proc check_effective_target_pie { } { +return [check_no_compiler_messages pie assembly { + #ifndef __PIE__ + #error unsupported + #endif +}] +} + # Return 1 if the target does not use a status wrapper. proc check_effective_target_unwrapped { } { -- 1.9.3
[testsuite] PATCH: Check if -pg available
On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote: > On 01/12/15 14:51, Magnus Granberg wrote: > >mĂĄndag 12 januari 2015 12.11.17 skrev H.J. Lu: > >>On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law wrote: > >>>On 01/12/15 12:59, H.J. Lu wrote: > I don't know if -pg will work PIE on any targets. For Linux/x86 > the choices of crt1.o are > > %{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} > > -shared, -pg and -pie are mutually exclusive. Those crt1 files are > only crt1 files provided by glibc. You can't even try -pg -pie on > Linux without changing glibc. > >>> > >>>You're totally missing the point. What I care about is *why*. > >>> > >With -pg it use gcrt1.o object file and that file is not compile with -fPIC. > >When you build a shared lib on x86_64 all the objects files need to be buiit > >with -fPIC else you get a error like that one abow and it is the same > >problems > >when you build bin with -fPIE and linke with -pie. > >Glibc do not provide one that is compile with -fPIC > Is there some reason why glibc could not provide gcrt1.o compiled with > -fPIC? > > Here is a patch to check if -pg is available. If -pg doesn't link, profiling isn't available. OK for trunk? Thanks. H.J. --- gcc/testsuite/lib/target-supports.exp | 6 ++ 1 file changed, 6 insertions(+) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 0ac9646..7c09399 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -546,6 +546,12 @@ proc check_profiling_available { test_what } { set profiling_available_saved 0 } else { set profiling_available_saved 1 + if { [check_no_compiler_messages_nocache profiling executable { + int main() { return 0; } } "-pg"] } { + set profiling_available_saved 1 +} else { + set profiling_available_saved 0 + } } } -- 1.9.3
Re: [PATCH]: New configure options that make the compiler use -fPIE and -pie as default option
On Mon, Jan 12, 2015 at 11:50:41PM +, Joseph Myers wrote: > On Mon, 12 Jan 2015, H.J. Lu wrote: > > > +if test x$enable_default_pie = xyes; then > > + AC_MSG_CHECKING(if $target supports default PIE) > > + enable_default_pie=no > > + case $target in > > +i?86*-*-linux* | x86_64*-*-linux*) > > + saved_LDFLAGS="$LDFLAGS" > > + saved_CFLAGS="$CFLAGS" > > + CFLAGS="$CFLAGS -fPIE" > > + LDFLAGS="$LDFLAGS -fPIE -pie" > > + AC_TRY_LINK(,,[enable_default_pie=yes],) > > + LDFLAGS="$saved_LDFLAGS" > > + CFLAGS="$saved_CFLAGS" > > + ;; > > +*) > > + ;; > > +esac > > There should not be any such hardcoding of targets here without concrete > evidence that the targets for which this sets enable_default_pie=no really > cannot support PIE. In particular, there is no reason at all for this to > be architecture-specific; all GNU/Linux architectures should support PIE. > > I believe AC_TRY_LINK here will test for the host, whereas what you want > to know is what's supported for the target (but it's not possible to run > link tests for the target at this point; the compiler for the target > hasn't even been built). > > So: just presume that if the user passes --enable-default-pie then they > know what they are doing, and don't try to override their choice. > > > diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi > > index c9e3bf1..89fc305 100644 > > --- a/gcc/doc/install.texi > > +++ b/gcc/doc/install.texi > > @@ -1583,6 +1583,10 @@ not be built. > > Specify that the run-time libraries for stack smashing protection > > should not be built. > > > > +@item --enable-default-pie > > +Turn on @option{-fPIE} and @option{-pie} by default if supported. > > +Currently supported targets are i?86-*-linux* and x86-64-*-linux*. > > The "if supported" and target list can then be removed here. > Here is the updated patch. To support --enable-default-pie, each target must update STARTFILE_SPEC to support PIE_SPEC and NO_PIE_SPEC. I can provide STARTFILE_SPEC patch if needed. Thanks. H.J. --- gcc/ 2015-01-12 Magnus Granberg H.J. Lu * Makefile.in (COMPILER): Add @NO_PIE_CFLAGS@. (LINKER): Add @NO_PIE_FLAG@. (libgcc.mvars): Set NO_PIE_CFLAGS to -fno-PIE for --enable-default-pie. * common.opt (fPIE): Initialize to -1. (fpie): Likewise. (static): Add "RejectNegative Negative(shared)". (no-pie): New option. (pie): Replace "Negative(shared)" with "Negative(no-pie)". * configure.ac: Add --enable-default-pie. (NO_PIE_CFLAGS): New. Check if -fno-PIE works. AC_SUBST. (NO_PIE_FLAG): New. Check if -no-pie works. AC_SUBST. * defaults.h (DEFAULT_FLAG_PIE): New. Default PIE to -fPIE. * gcc.c (NO_PIE_SPEC): New. (PIE_SPEC): Likewise. (LD_PIE_SPEC): Likewise. (LINK_PIE_SPEC): Handle -no-pie. Use PIE_SPEC and LD_PIE_SPEC. * opts.c (DEFAULT_FLAG_PIE): New. Set to 0 if ENABLE_DEFAULT_PIE is undefined. (finish_options): Update opts->x_flag_pie if it is -1. * config/gnu-user.h (FVTABLE_VERIFY_SPEC): New. (GNU_USER_TARGET_STARTFILE_SPEC): Use FVTABLE_VERIFY_SPEC. Use NO_PIE_SPEC and NO_PIE_SPEC if ENABLE_DEFAULT_PIE is defined. (GNU_USER_TARGET_STARTFILE_SPEC): Use FVTABLE_VERIFY_SPEC. * doc/install.texi: Document --enable-default-pie. * doc/invoke.texi: Document -no-pie. * config.in: Regenerated. * configure: Likewise. gcc/ada/ 2015-01-12 H.J. Lu * gcc-interface/Makefile.in (TOOLS_LIBS): Add @NO_PIE_FLAG@. libgcc/ 2015-01-12 H.J. Lu * Makefile.in (CRTSTUFF_CFLAGS): Add $(NO_PIE_CFLAGS). diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 5f9261f..180751f 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -252,6 +252,12 @@ LINKER = $(CC) LINKER_FLAGS = $(CFLAGS) endif +# We don't want to compile the compiler with -fPIE, it make PCH fail. +COMPILER += @NO_PIE_CFLAGS@ + +# Link with -no-pie since we compile the compiler with -fno-PIE. +LINKER += @NO_PIE_FLAG@ + # Like LINKER, but use a mutex for serializing front end links. ifeq (@DO_LINK_MUTEX@,true) LLINKER = $(SHELL) $(srcdir)/lock-and-run.sh linkfe.lck $(LINKER) @@ -1854,6 +1860,12 @@ libgcc.mvars: config.status Makefile specs xgcc$(exeext) echo GCC_CFLAGS = '$(GCC_CFLAGS)' >> tmp-libgcc.mvars echo INHIBIT_LIBC_CFLAGS = '$(INHIBIT_LIBC_CFLAGS)' >> tmp-libgcc.mvars echo TARGET_SYSTEM_ROOT = '$(TARGET_SYSTEM_ROOT)' >> tmp-libgcc.mvars + if test @enable_default_pie@ = yes; then \ + NO_PIE_CFLAGS="-fno-PIE"; \ + else \ + NO_PIE_CFLAGS=; \ + fi; \ + echo NO_PIE_CFLAGS = "$$NO_PIE_CFLAGS" >> tmp-libgcc.mvars mv tmp-libgcc.mvars libgcc.mvars diff --git a/gcc/ada/gcc-interface/Makefile.in b/gcc/ada/gcc-interface/Makefile.in index 870cfab..a446d48 100644 --- a/gcc/ada
Re: [Fortran, Patch] Cosmetics
Hi, is this patch commited now? I don't have the rights to do so myself. - Andre On Sun, 28 Dec 2014 17:17:50 +0100 FX wrote: > > > 2014-12-28 Andre Vehreschild > > > >* trans-decl.c (gfc_finish_var_decl): Fixed displaced comment. > >* trans-stmt.c (gfc_trans_allocate): Fixed indentation. > > OK to commit. Thanks! > > FX -- Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen Tel.: +49 241 9291018 * Email: ve...@gmx.de
Re: [testsuite] PATCH: Check if -pg available
On Tue, Jan 13, 2015 at 04:54:32AM -0800, H.J. Lu wrote: > On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote: > > On 01/12/15 14:51, Magnus Granberg wrote: > > >mĂĄndag 12 januari 2015 12.11.17 skrev H.J. Lu: > > >>On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law wrote: > > >>>On 01/12/15 12:59, H.J. Lu wrote: > > I don't know if -pg will work PIE on any targets. For Linux/x86 > > the choices of crt1.o are > > > > %{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} > > > > -shared, -pg and -pie are mutually exclusive. Those crt1 files are > > only crt1 files provided by glibc. You can't even try -pg -pie on > > Linux without changing glibc. > > >>> > > >>>You're totally missing the point. What I care about is *why*. > > >>> > > >With -pg it use gcrt1.o object file and that file is not compile with > > >-fPIC. > > >When you build a shared lib on x86_64 all the objects files need to be > > >buiit > > >with -fPIC else you get a error like that one abow and it is the same > > >problems > > >when you build bin with -fPIE and linke with -pie. > > >Glibc do not provide one that is compile with -fPIC > > Is there some reason why glibc could not provide gcrt1.o compiled with > > -fPIC? > > > > > > Here is a patch to check if -pg is available. If -pg doesn't link, > profiling isn't available. OK for trunk? > > Thanks. > > > H.J. > --- > gcc/testsuite/lib/target-supports.exp | 6 ++ > 1 file changed, 6 insertions(+) > > diff --git a/gcc/testsuite/lib/target-supports.exp > b/gcc/testsuite/lib/target-supports.exp > index 0ac9646..7c09399 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -546,6 +546,12 @@ proc check_profiling_available { test_what } { > set profiling_available_saved 0 > } else { > set profiling_available_saved 1 > + if { [check_no_compiler_messages_nocache profiling executable { > + int main() { return 0; } } "-pg"] } { > + set profiling_available_saved 1 > + } else { > + set profiling_available_saved 0 > + } > } > } > Here is the ChangeLog entry. 2015-01-13 H.J. Lu * lib/target-supports.exp (check_profiling_available): Check if -pg links. H.J.
[PATCH] Fix PR64415
The following removes -fvar-tracking-assignments from being eligible to the optimization attribute/pragma which fixes LTO operation for mixed inputs (LTO just drops debug stmts if the flag is false). In theory we could also fix inlining to do that when inlining debug stmts into a non-VTA function but I think allowing this kind of per-function IL flags is just silly. Thoughts? Thanks, Richard. 2015-01-13 Richard Biener PR lto/64415 * common.opt (fvar-tracking-assignments): Remove 'Optimization' flag. (fvar-tracking-assignments-toggle): Likewise. * gcc.dg/lto/pr64415_0.c: New testcase. * gcc.dg/lto/pr64415_1.c: Likewise. Index: gcc/common.opt === --- gcc/common.opt (revision 219520) +++ gcc/common.opt (working copy) @@ -2397,13 +2397,13 @@ Perform variable tracking ; annotations. When flag_var_tracking_assignments == ; AUTODETECT_VALUE it will be set according to flag_var_tracking. fvar-tracking-assignments -Common Report Var(flag_var_tracking_assignments) Init(2) Optimization +Common Report Var(flag_var_tracking_assignments) Init(2) Perform variable tracking by annotating assignments ; Nonzero if we should toggle flag_var_tracking_assignments after ; processing options and computing its default. */ fvar-tracking-assignments-toggle -Common Report Var(flag_var_tracking_assignments_toggle) Optimization +Common Report Var(flag_var_tracking_assignments_toggle) Toggle -fvar-tracking-assignments ; Positive if we should track uninitialized variables, negative if Index: gcc/testsuite/gcc.dg/lto/pr64415_0.c === --- gcc/testsuite/gcc.dg/lto/pr64415_0.c(revision 0) +++ gcc/testsuite/gcc.dg/lto/pr64415_0.c(working copy) @@ -0,0 +1,13 @@ +/* { dg-lto-do link } */ +/* { dg-require-effective-target fpic } */ +/* { dg-lto-options { { -O -flto -fpic } } } */ +/* { dg-extra-ld-options { -shared } } */ + +extern void bar(char *, int); + +extern char *baz; + +void foo() +{ + bar(baz, 0); +} Index: gcc/testsuite/gcc.dg/lto/pr64415_1.c === --- gcc/testsuite/gcc.dg/lto/pr64415_1.c(revision 0) +++ gcc/testsuite/gcc.dg/lto/pr64415_1.c(working copy) @@ -0,0 +1,17 @@ +/* { dg-options "-g" } */ + +extern int n; + +void bar(char *, int); + +inline void bar(char *s, int i) +{ + char *p = s; + +#ifdef V1 + if (i) +#else + if (n) +#endif +*s = 0; +}
[PATCH] Fix PR64373
The following patch guards LTO against PARM_DECLs without DECL_CONTEXT. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2015-02-13 Richard Biener PR lto/64373 * lto-streamer-out.c (tree_is_indexable): Guard for NULL DECL_CONTEXT. * gcc.dg/lto/pr64373_0.c: New testcase. Index: gcc/lto-streamer-out.c === --- gcc/lto-streamer-out.c (revision 219520) +++ gcc/lto-streamer-out.c (working copy) @@ -154,7 +154,8 @@ tree_is_indexable (tree t) /* Parameters and return values of functions of variably modified types must go to global stream, because they may be used in the type definition. */ - if (TREE_CODE (t) == PARM_DECL || TREE_CODE (t) == RESULT_DECL) + if ((TREE_CODE (t) == PARM_DECL || TREE_CODE (t) == RESULT_DECL) + && DECL_CONTEXT (t)) return variably_modified_type_p (TREE_TYPE (DECL_CONTEXT (t)), NULL_TREE); /* IMPORTED_DECL is put into BLOCK and thus it never can be shared. */ else if (TREE_CODE (t) == IMPORTED_DECL) Index: gcc/testsuite/gcc.dg/lto/pr64373_0.c === --- gcc/testsuite/gcc.dg/lto/pr64373_0.c(revision 0) +++ gcc/testsuite/gcc.dg/lto/pr64373_0.c(working copy) @@ -0,0 +1,10 @@ +/* { dg-lto-do assemble } */ + +extern void b(int L, float (*data)[L]); + +void a(void) +{ + float* p = 0; + int i = 0; + b(10, (float (*)[10])(p + i)); +}
[PATCH] Fix PR64406
When a optimization pass in the loop pipeline moves stmts between loops or removes loops we have to reset the SCEV cache to not have stale CHREC_LOOPs. This patch does it for loop distribution for which I have a testcase. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2015-01-13 Richard Biener PR tree-optimization/64406 * tree-loop-distibution.c (pass_loop_distribution::execute): Reset the SCEV hashtable if we distributed anything. * gcc.dg/pr64406.c: New testcase. Index: gcc/tree-loop-distribution.c === --- gcc/tree-loop-distribution.c(revision 219520) +++ gcc/tree-loop-distribution.c(working copy) @@ -1838,6 +1851,9 @@ out: if (changed) { + /* Cached scalar evolutions now may refer to wrong or non-existing +loops. */ + scev_reset_htab (); mark_virtual_operands_for_renaming (fun); rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); } Index: gcc/testsuite/gcc.dg/pr64406.c === --- gcc/testsuite/gcc.dg/pr64406.c (revision 0) +++ gcc/testsuite/gcc.dg/pr64406.c (working copy) @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O -ftree-loop-distribute-patterns -fno-tree-loop-ivcanon -fno-tree-loop-vectorize -ftree-vectorize" } */ + +unsigned in[72]; + +void bar (unsigned out[], unsigned ia[]); + +void +foo () +{ + int i; + unsigned out[72], ia[8]; + for (i = 0; i < 8; i++) +{ + out[i * 8] = in[i * 8] + 5; + out[i * 8 + 1] = in[i * 8 + 1] + 6; + out[i * 8 + 2] = in[i * 8 + 2] + 7; + out[i * 8 + 3] = in[i * 8 + 3] + 8; + out[i * 8 + 4] = in[i * 8 + 4] + 9; + out[i * 8 + 5] = in[i * 8 + 5] + 10; + out[i * 8 + 6] = in[i * 8 + 6] + 11; + out[i * 8 + 7] = in[i * 8 + 7] + 12; + ia[i] = in[i]; +} + bar (out, ia); +}
Re: [PATCH] Fix PR64415
On Tue, Jan 13, 2015 at 02:04:26PM +0100, Richard Biener wrote: > The following removes -fvar-tracking-assignments from being eligible > to the optimization attribute/pragma which fixes LTO operation for > mixed inputs (LTO just drops debug stmts if the flag is false). > > In theory we could also fix inlining to do that when inlining > debug stmts into a non-VTA function but I think allowing this > kind of per-function IL flags is just silly. I actually think it makes sense to disable -fvar-tracking-assignments just for specific function, e.g. when it is known to be too expensive on some large function you don't care about debug info quality too much, while you still don't want to disable it on the whole TU level, because you have other functions (e.g. small ones) you still want to be able to debug often with good coverage. So if this is fixable in the inliner and/or LTO in-streamer that would be my preference. Jakub
[PATCH] Fix PRs 64493 and 64495
The following fixes a bug in outer loop reduction vectorization which happens to use a bogus vectorized stmt for the inner loop exit PHI. Bootstrap and regtest in progress on x86_64-unknown-linux-gnu. Richard. 2015-01-13 Richard Biener PR tree-optimization/64493 PR tree-optimization/64495 * tree-vect-loop.c (vect_finalize_reduction): For double-reductions assign the proper vectorized PHI to the inner loop exit PHIs. * gcc.dg/vect/pr64493.c: New testcase. * gcc.dg/vect/pr64495.c: Likewise. Index: gcc/tree-vect-loop.c === --- gcc/tree-vect-loop.c(revision 219520) +++ gcc/tree-vect-loop.c(working copy) @@ -4580,7 +4580,10 @@ vect_finalize_reduction: && !STMT_VINFO_LIVE_P (exit_phi_vinfo)) || double_reduc); - STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt; + if (double_reduc) + STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi; + else + STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt; if (!double_reduc || STMT_VINFO_DEF_TYPE (exit_phi_vinfo) != vect_double_reduction_def) Index: gcc/testsuite/gcc.dg/vect/pr64493.c === --- gcc/testsuite/gcc.dg/vect/pr64493.c (revision 0) +++ gcc/testsuite/gcc.dg/vect/pr64493.c (working copy) @@ -0,0 +1,31 @@ +/* { dg-do run } */ + +#include "tree-vect.h" + +int a, b, c, d, e, f, g, h; + +int +main () +{ + check_vect (); + + for (; a; a--) +for (d = 1; d <= 0; d++) + for (; d;) + if (h) + { + if (!g) __builtin_abort (); + if (!0) __builtin_abort (); + } + + for (f = 4; f; f--) +{ + for (b = 0; b < 2; b++) + c |= 1; + e |= c; +} + + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/pr64495.c === --- gcc/testsuite/gcc.dg/vect/pr64495.c (revision 0) +++ gcc/testsuite/gcc.dg/vect/pr64495.c (working copy) @@ -0,0 +1,35 @@ +/* { dg-do run } */ + +#include +#include "tree-vect.h" + +int a, b, c, d, e, f, g, i, j; +static int *h = &e; + +int +main () +{ + check_vect (); + + for (; a;) +for (; g; g++) + for (; f; f++) + if (j) + { + assert(b); + assert(0); + } + for (i = 24; i; i--) +{ + for (c = 0; c < 6; c++) + d |= 1; + *h |= d; +} + + if (e != 1) +__builtin_abort (); + + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */
Re: [PATCH 1/4] Core definition for APM XGene-1 and associated cost-table.
On 12 January 2015 at 20:15, Philipp Tomsich wrote: > +2014-11-19 Philipp Tomsich > + > + * config/aarch64/aarch64-cores.def (xgene1): Update/add the > + xgene1 (APM XGene-1) core definition. > + * gcc/config/aarch64/aarch64.c: Add cost tables for APM XGene-1 > + * config/arm/aarch-cost-tables.h: Add cost tables for APM XGene-1 > + * doc/invoke.texi: Document -mcpu=xgene1. > + Fix the date in the ChangeLog entry... otherwise OK commit it. Thanks /Marcus
[PATCH][ARM] PR 64149: Remove -mlra/-mno-lra option for ARM.
Hello, The LRA register alloator is enabled by default for the ARM backend and -mno-lra should no longer be used. This patch removes the -mlra/-mno-lra option from the ARM backend. arm-none-linux-gnueabihf passes gcc-check with no new failures. Matthew 2015-01-13 Matthew Wahab PR target/64149 * config/arm/arm.opt: Remove lra option and arm_lra_flag variable. * config/arm/arm.h (MODE_BASE_REG_CLASS): Remove use of arm_lra_flag, replace the conditional with it's true branch. * config/arm/arm.c (TARGET_LRA_P): Set to hook_bool_void_true. (arm_lra_p): Remove. * testsuite/gcc.target/arm/thumb1-far-jump-3.c: Remove.diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 8ca2dd8..e03e063 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -103,7 +103,6 @@ struct four_ints /* Forward function declarations. */ static bool arm_const_not_ok_for_debug_p (rtx); -static bool arm_lra_p (void); static bool arm_needs_doubleword_align (machine_mode, const_tree); static int arm_compute_static_chain_stack_bytes (void); static arm_stack_offsets *arm_get_frame_offsets (void); @@ -370,7 +369,7 @@ static const struct attribute_spec arm_attribute_table[] = #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address #undef TARGET_LRA_P -#define TARGET_LRA_P arm_lra_p +#define TARGET_LRA_P hook_bool_void_true #undef TARGET_ATTRIBUTE_TABLE #define TARGET_ATTRIBUTE_TABLE arm_attribute_table @@ -5932,13 +5931,6 @@ arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype, } } -/* Return true if we use LRA instead of reload pass. */ -static bool -arm_lra_p (void) -{ - return arm_lra_flag; -} - /* Return true if mode/type need doubleword alignment. */ static bool arm_needs_doubleword_align (machine_mode mode, const_tree type) diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 556e2da..ff38017 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1287,14 +1287,10 @@ enum reg_class /* For the Thumb the high registers cannot be used as base registers when addressing quantities in QI or HI mode; if we don't know the mode, then we must be conservative. */ -#define MODE_BASE_REG_CLASS(MODE) \ - (arm_lra_flag\ - ? (TARGET_32BIT ? CORE_REGS \ - : GET_MODE_SIZE (MODE) >= 4 ? BASE_REGS\ - : LO_REGS) \ - : ((TARGET_ARM || (TARGET_THUMB2 && !optimize_size)) ? CORE_REGS \ - : ((MODE) == SImode) ? BASE_REGS \ - : LO_REGS)) +#define MODE_BASE_REG_CLASS(MODE)\ + (TARGET_32BIT ? CORE_REGS \ + : GET_MODE_SIZE (MODE) >= 4 ? BASE_REGS \ + : LO_REGS) /* For Thumb we can not support SP+reg addressing, so we return LO_REGS instead of BASE_REGS. */ diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt index 5385e4a..6da49b8 100644 --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -143,10 +143,6 @@ mfpu= Target RejectNegative Joined Enum(arm_fpu) Var(arm_fpu_index) Specify the name of the target floating point hardware/format -mlra -Target Report Var(arm_lra_flag) Init(1) Save -Use LRA instead of reload (transitional) - mhard-float Target RejectNegative Alias(mfloat-abi=, hard) Undocumented diff --git a/gcc/testsuite/gcc.target/arm/thumb1-far-jump-3.c b/gcc/testsuite/gcc.target/arm/thumb1-far-jump-3.c deleted file mode 100644 index 90559ba..000 --- a/gcc/testsuite/gcc.target/arm/thumb1-far-jump-3.c +++ /dev/null @@ -1,108 +0,0 @@ -/* Catch reload ICE on target thumb1 with far jump optimization. - * It is also a valid case for non-thumb1 target. */ - -/* Add -mno-lra option as it is only reproducable with reload. It will - be removed after reload is completely removed. */ -/* { dg-options "-mno-lra -fomit-frame-pointer" } */ -/* { dg-do compile } */ - -#define C 2 -#define A 4 -#define RGB (C | A) -#define GRAY (A) - -typedef unsigned long uint_32; -typedef unsigned char byte; -typedef byte* bytep; - -typedef struct ss -{ - uint_32 w; - uint_32 r; - byte c; - byte b; - byte p; -} info; - -typedef info * infop; - -void -foo(infop info, bytep row) -{ - uint_32 iw = info->w; - if (info->c == RGB) - { - if (info->b == 8) - { - bytep sp = row + info->r; - bytep dp = sp; - byte save; - uint_32 i; - - for (i = 0; i < iw; i++) - { -save = *(--sp); -*(--dp) = *(--sp); -*(--dp) = *(--sp); -*(--dp) = *(--sp); -*(--dp) = save; - } - } - - else - { - bytep sp = row + info->r; - bytep dp = sp; - byte save[2]; - uint_32 i; - - for (i = 0; i < iw; i++) - { -save[0] = *(--sp); -save[1] = *(--sp); -*(--dp) = *(--sp); -*(--dp) = *(--sp); -*(--dp) = *(--sp); -*(--dp) = *(--sp); -*(--dp) = *(--sp); -*(--dp)
Re: [PATCH] Fix PR64415
On Tue, 13 Jan 2015, Jakub Jelinek wrote: > On Tue, Jan 13, 2015 at 02:04:26PM +0100, Richard Biener wrote: > > The following removes -fvar-tracking-assignments from being eligible > > to the optimization attribute/pragma which fixes LTO operation for > > mixed inputs (LTO just drops debug stmts if the flag is false). > > > > In theory we could also fix inlining to do that when inlining > > debug stmts into a non-VTA function but I think allowing this > > kind of per-function IL flags is just silly. > > I actually think it makes sense to disable -fvar-tracking-assignments > just for specific function, e.g. when it is known to be too expensive > on some large function you don't care about debug info quality too much, > while you still don't want to disable it on the whole TU level, because > you have other functions (e.g. small ones) you still want to be able to > debug often with good coverage. > > So if this is fixable in the inliner and/or LTO in-streamer that would be > my preference. The following seems to work (for the testcase). Testing coverage of this mode will of course be bad. Richard. 2015-01-13 Richard Biener PR lto/64415 * tree-inline.c (insert_debug_decl_map): Check destination function MAY_HAVE_DEBUG_STMTS. (insert_init_debug_bind): Likewise. (insert_init_stmt): Remove redundant check. (remap_gimple_stmt): Drop debug stmts if the destination function has var-tracking assignments disabled. * gcc.dg/lto/pr64415_0.c: New testcase. * gcc.dg/lto/pr64415_1.c: Likewise. Index: gcc/testsuite/gcc.dg/lto/pr64415_0.c === --- gcc/testsuite/gcc.dg/lto/pr64415_0.c(revision 0) +++ gcc/testsuite/gcc.dg/lto/pr64415_0.c(working copy) @@ -0,0 +1,13 @@ +/* { dg-lto-do link } */ +/* { dg-require-effective-target fpic } */ +/* { dg-lto-options { { -O -flto -fpic } } } */ +/* { dg-extra-ld-options { -shared } } */ + +extern void bar(char *, int); + +extern char *baz; + +void foo() +{ + bar(baz, 0); +} Index: gcc/testsuite/gcc.dg/lto/pr64415_1.c === --- gcc/testsuite/gcc.dg/lto/pr64415_1.c(revision 0) +++ gcc/testsuite/gcc.dg/lto/pr64415_1.c(working copy) @@ -0,0 +1,17 @@ +/* { dg-options "-g" } */ + +extern int n; + +void bar(char *, int); + +inline void bar(char *s, int i) +{ + char *p = s; + +#ifdef V1 + if (i) +#else + if (n) +#endif +*s = 0; +} Index: gcc/tree-inline.c === --- gcc/tree-inline.c (revision 219520) +++ gcc/tree-inline.c (working copy) @@ -192,7 +192,7 @@ insert_debug_decl_map (copy_body_data *i if (!gimple_in_ssa_p (id->src_cfun)) return; - if (!MAY_HAVE_DEBUG_STMTS) + if (!opt_for_fn (id->dst_fn, flag_var_tracking_assignments)) return; if (!target_for_debug_bind (key)) @@ -1348,6 +1348,10 @@ remap_gimple_stmt (gimple stmt, copy_bod bool skip_first = false; gimple_seq stmts = NULL; + if (is_gimple_debug (stmt) + && !opt_for_fn (id->dst_fn, flag_var_tracking_assignments)) +return stmts; + /* Begin by recognizing trees that we'll completely rewrite for the inlining context. Our output for these trees is completely different from out input (e.g. RETURN_EXPR is deleted, and morphs @@ -3007,7 +3011,7 @@ insert_init_debug_bind (copy_body_data * if (!gimple_in_ssa_p (id->src_cfun)) return NULL; - if (!MAY_HAVE_DEBUG_STMTS) + if (!opt_for_fn (id->dst_fn, flag_var_tracking_assignments)) return NULL; tracked_var = target_for_debug_bind (var); @@ -3063,7 +3067,7 @@ insert_init_stmt (copy_body_data *id, ba gsi_insert_after (&si, init_stmt, GSI_NEW_STMT); gimple_regimplify_operands (init_stmt, &si); - if (!is_gimple_debug (init_stmt) && MAY_HAVE_DEBUG_STMTS) + if (!is_gimple_debug (init_stmt)) { tree def = gimple_assign_lhs (init_stmt); insert_init_debug_bind (id, bb, def, def, init_stmt);
Re: [PATCH] Fix PR64415
On Tue, Jan 13, 2015 at 02:26:39PM +0100, Richard Biener wrote: > The following seems to work (for the testcase). Testing coverage > of this mode will of course be bad. LGTM. > 2015-01-13 Richard Biener > > PR lto/64415 > * tree-inline.c (insert_debug_decl_map): Check destination > function MAY_HAVE_DEBUG_STMTS. > (insert_init_debug_bind): Likewise. > (insert_init_stmt): Remove redundant check. > (remap_gimple_stmt): Drop debug stmts if the destination > function has var-tracking assignments disabled. > > * gcc.dg/lto/pr64415_0.c: New testcase. > * gcc.dg/lto/pr64415_1.c: Likewise. Jakub
Re: [PATCH 2/4] Pipeline model for APM XGene-1.
On 12 January 2015 at 20:15, Philipp Tomsich wrote: > --- > gcc/config/aarch64/aarch64.md | 1 + > gcc/config/arm/xgene1.md | 531 > ++ > 2 files changed, 532 insertions(+) > create mode 100644 gcc/config/arm/xgene1.md > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index 12e1054..1f6b1b6 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -190,6 +190,7 @@ > (include "../arm/cortex-a53.md") > (include "../arm/cortex-a15.md") > (include "thunderx.md") > +(include "../arm/xgene1.md Can we have a ChangeLog entry please. /Marcus
Re: [PATCH 2/4] Pipeline model for APM XGene-1.
On 13/01/15 13:46, Marcus Shawcroft wrote: > On 12 January 2015 at 20:15, Philipp Tomsich > wrote: >> --- >> gcc/config/aarch64/aarch64.md | 1 + >> gcc/config/arm/xgene1.md | 531 >> ++ >> 2 files changed, 532 insertions(+) >> create mode 100644 gcc/config/arm/xgene1.md >> >> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md >> index 12e1054..1f6b1b6 100644 >> --- a/gcc/config/aarch64/aarch64.md >> +++ b/gcc/config/aarch64/aarch64.md >> @@ -190,6 +190,7 @@ >> (include "../arm/cortex-a53.md") >> (include "../arm/cortex-a15.md") >> (include "thunderx.md") >> +(include "../arm/xgene1.md > > Can we have a ChangeLog entry please. > /Marcus > And please update the copyright years in xgene1.md. R.
Re: [PATCH, aarch64] Add prefetch support
On 11 January 2015 at 02:37, Andrew Pinski wrote: > On Tue, Nov 11, 2014 at 6:47 AM, Marcus Shawcroft > wrote: >> On 30 October 2014 08:54, Gopalasubramanian, Ganesh >> wrote: >> >>> 2014-10-30 Ganesh Gopalasubramanian >> >> Check the whitespace in your ChangeLog line. >> >>> * config/arm/types.md (define_attr "type"): Add prefetch. >> >> The existing schedulers use 'load1'. We can of course split that into >> two introducing "prefetch" and update all of the existing schedulers >> to reflect the change. However I suggest we do that as a separate >> activity when someone actually needs the distinction, note this change >> will require updating the schedulers for both ARM and AArch64 backends >> not just those relevant to AArch64. For this prefetch patch I suggest >> we go with the existing "load1". > > I will need this change for ThunderX schedule. The Pref instruction > is single issued while load1 can be dual issued. Hi https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00802.html Philipp when you deal with Ramana's request above to split load1->load1/prefetch in the existing schedulers I suggest you also split it in aarch64/thunderx.md in order to retain existing behaviour. Andrew can then follow up add the "right" behaviour when he is ready. Andrew OK ? Cheers /Marcus
Re: [PATCH, aarch64] Add prefetch support
On Tue, Jan 13, 2015 at 6:13 AM, Marcus Shawcroft wrote: > On 11 January 2015 at 02:37, Andrew Pinski wrote: >> On Tue, Nov 11, 2014 at 6:47 AM, Marcus Shawcroft >> wrote: >>> On 30 October 2014 08:54, Gopalasubramanian, Ganesh >>> wrote: >>> 2014-10-30 Ganesh Gopalasubramanian >>> >>> Check the whitespace in your ChangeLog line. >>> * config/arm/types.md (define_attr "type"): Add prefetch. >>> >>> The existing schedulers use 'load1'. We can of course split that into >>> two introducing "prefetch" and update all of the existing schedulers >>> to reflect the change. However I suggest we do that as a separate >>> activity when someone actually needs the distinction, note this change >>> will require updating the schedulers for both ARM and AArch64 backends >>> not just those relevant to AArch64. For this prefetch patch I suggest >>> we go with the existing "load1". >> >> I will need this change for ThunderX schedule. The Pref instruction >> is single issued while load1 can be dual issued. > > Hi > > https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00802.html > > Philipp when you deal with Ramana's request above to split > load1->load1/prefetch in the existing schedulers I suggest you also > split it in aarch64/thunderx.md in order to retain existing behaviour. > Andrew can then follow up add the "right" behaviour when he is ready. > Andrew OK ? Yes that sounds ok to me. I was going to submit an update to thunderx.md file this week anyways. Thanks, Andrew > > Cheers > /Marcus
Re: [testsuite] PATCH: Add check_effective_target_pie
On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote: > On 01/12/15 14:51, Magnus Granberg wrote: > >mĂĄndag 12 januari 2015 12.11.17 skrev H.J. Lu: > >>On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law wrote: > >>>On 01/12/15 12:59, H.J. Lu wrote: > I don't know if -pg will work PIE on any targets. For Linux/x86 > the choices of crt1.o are > > %{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} > > -shared, -pg and -pie are mutually exclusive. Those crt1 files are > only crt1 files provided by glibc. You can't even try -pg -pie on > Linux without changing glibc. > >>> > >>>You're totally missing the point. What I care about is *why*. > >>> > >With -pg it use gcrt1.o object file and that file is not compile with -fPIC. > >When you build a shared lib on x86_64 all the objects files need to be buiit > >with -fPIC else you get a error like that one abow and it is the same > >problems > >when you build bin with -fPIE and linke with -pie. > >Glibc do not provide one that is compile with -fPIC > Is there some reason why glibc could not provide gcrt1.o compiled with > -fPIC? > I opened a glibc bug: https://sourceware.org/bugzilla/show_bug.cgi?id=17836 and submitted a patch: https://sourceware.org/ml/libc-alpha/2015-01/msg00284.html H.J.
Re: [PATCH, aarch64] Add prefetch support
Great. I should have an update patch-set ready & tested later tonight. Best, Phil. > On 13 Jan 2015, at 15:18, Andrew Pinski wrote: > > On Tue, Jan 13, 2015 at 6:13 AM, Marcus Shawcroft > wrote: >> On 11 January 2015 at 02:37, Andrew Pinski wrote: >>> On Tue, Nov 11, 2014 at 6:47 AM, Marcus Shawcroft >>> wrote: On 30 October 2014 08:54, Gopalasubramanian, Ganesh wrote: > 2014-10-30 Ganesh Gopalasubramanian Check the whitespace in your ChangeLog line. >* config/arm/types.md (define_attr "type"): Add prefetch. The existing schedulers use 'load1'. We can of course split that into two introducing "prefetch" and update all of the existing schedulers to reflect the change. However I suggest we do that as a separate activity when someone actually needs the distinction, note this change will require updating the schedulers for both ARM and AArch64 backends not just those relevant to AArch64. For this prefetch patch I suggest we go with the existing "load1". >>> >>> I will need this change for ThunderX schedule. The Pref instruction >>> is single issued while load1 can be dual issued. >> >> Hi >> >> https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00802.html >> >> Philipp when you deal with Ramana's request above to split >> load1->load1/prefetch in the existing schedulers I suggest you also >> split it in aarch64/thunderx.md in order to retain existing behaviour. >> Andrew can then follow up add the "right" behaviour when he is ready. >> Andrew OK ? > > Yes that sounds ok to me. I was going to submit an update to > thunderx.md file this week anyways. > > Thanks, > Andrew > > >> >> Cheers >> /Marcus
[PATCH] PR59448 - Promote consume to acquire
Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448 Basically we can generate incorrect code for an atomic consume operation in some circumstances. The general feeling seems to be that we should simply promote all consume operations to an acquire operation until there is a better definition/understanding of the consume model and how GCC can track it. I proposed a simple patch in the PR, and I have not seen or heard of any dissenting opinion. We should get this in before the end of stage 3 I think. The problem with the patch in the PR is the memory model is immediately promoted from consume to acquire. This happens *before* any of the memmodel checks are made. If a consume is illegally specified (such as in a compare_exchange), it gets promoted to acquire and the compiler doesn't report the error because it never sees the consume. This new patch simply makes the adjustment after any errors are checked on the originally specified model. It bootstraps on x86_64-unknown-linux-gnu and passes all regression testing. I also built an aarch64 compiler and it appears to issue the LDAR as specified in the PR, but anyone with a vested interest really ought to check it out with a real build to be sure. OK for trunk? Andrew * builtins.c (memmodel_consume_fix) : New. Promote consume to acquire. (expand_builtin_atomic_exchange, expand_builtin_atomic_compare_exchange, expand_builtin_atomic_load, expand_builtin_atomic_fetch_op, expand_builtin_atomic_clear, expand_builtin_atomic_test_and_set, expand_builtin_atomic_thread_fence, expand_builtin_atomic_signal_fence): Call memmodel_consume_fix. Index: builtins.c === *** builtins.c (revision 219462) --- builtins.c (working copy) *** get_memmodel (tree exp) *** 5368,5373 --- 5368,5382 return (enum memmodel) val; } + /* Workaround for Bugzilla 59448. GCC doesn't track consume properly, so +be conservative and promote consume to acquire. */ + static void + memmodel_consume_fix (enum memmodel &val) + { + if (val == MEMMODEL_CONSUME) + val = MEMMODEL_ACQUIRE; + } + /* Expand the __atomic_exchange intrinsic: TYPE __atomic_exchange (TYPE *object, TYPE desired, enum memmodel) EXP is the CALL_EXPR. *** expand_builtin_atomic_exchange (machine_ *** 5389,5394 --- 5398,5405 if (!flag_inline_atomics) return NULL_RTX; + memmodel_consume_fix (model); + /* Expand the operands. */ mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); *** expand_builtin_atomic_compare_exchange ( *** 5434,5439 --- 5445,5453 if (!flag_inline_atomics) return NULL_RTX; + memmodel_consume_fix (success); + memmodel_consume_fix (failure); + /* Expand the operands. */ mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); *** expand_builtin_atomic_load (machine_mode *** 5493,5498 --- 5507,5514 if (!flag_inline_atomics) return NULL_RTX; + memmodel_consume_fix (model); + /* Expand the operand. */ mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); *** expand_builtin_atomic_fetch_op (machine_ *** 5553,5558 --- 5569,5576 model = get_memmodel (CALL_EXPR_ARG (exp, 2)); + memmodel_consume_fix (model); + /* Expand the operands. */ mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); *** expand_builtin_atomic_clear (tree exp) *** 5627,5632 --- 5645,5652 return const0_rtx; } + memmodel_consume_fix (model); + if (HAVE_atomic_clear) { emit_insn (gen_atomic_clear (mem, model)); *** expand_builtin_atomic_test_and_set (tree *** 5658,5664 mode = mode_for_size (BOOL_TYPE_SIZE, MODE_INT, 0); mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); model = get_memmodel (CALL_EXPR_ARG (exp, 1)); ! return expand_atomic_test_and_set (target, mem, model); } --- 5678,5684 mode = mode_for_size (BOOL_TYPE_SIZE, MODE_INT, 0); mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); model = get_memmodel (CALL_EXPR_ARG (exp, 1)); ! memmodel_consume_fix (model); return expand_atomic_test_and_set (target, mem, model); } *** static void *** 5797,5802 --- 5817,5823 expand_builtin_atomic_thread_fence (tree exp) { enum memmodel model = get_memmodel (CALL_EXPR_ARG (exp, 0)); + memmodel_consume_fix (model); expand_mem_thread_fence (model); } *** static void *** 5808,5813 --- 5829,5835 expand_builtin_atomic_signal_fence (tree exp) { enum memmodel model = get_memmodel (CALL_EXPR_ARG (exp, 0)); + memmodel_consume_fix (model); expand_mem_sig
[patch] libstdc++/64571 export fstream functions using new std::string
The existing tests for these functions are compile-only so didn't catch that I forgot to export these new symbols. I'll add a better test next week. Tested x86_64-linux, committed to trunk. commit d428e75af04d995451a917ef7c9caed6b8cee737 Author: Jonathan Wakely Date: Tue Jan 13 14:27:34 2015 + PR libstdc++/64571 * config/abi/pre/gnu.ver: Export fstream functions using new string. diff --git a/libstdc++-v3/config/abi/pre/gnu.ver b/libstdc++-v3/config/abi/pre/gnu.ver index 7bb65e9..700da18 100644 --- a/libstdc++-v3/config/abi/pre/gnu.ver +++ b/libstdc++-v3/config/abi/pre/gnu.ver @@ -1648,6 +1648,13 @@ GLIBCXX_3.4.21 { _ZStlsI[cw]St11char_traitsI[cw]ESaI[cw]EERSt13basic_ostreamIT_T0_ES7_RKNSt7__cxx1112basic_string*; _ZStrsI[cw]St11char_traitsI[cw]ESaI[cw]EERSt13basic_istreamIT_T0_ES7_RNSt7__cxx1112basic_string*; +# fstream functions taking ABI-tagged std::string +_ZNSt13basic_filebufI[cw]St11char_traitsI[cw]EE4openERKNSt7__cxx1112basic_string*; +_ZNSt13basic_fstreamI[cw]St11char_traitsI[cw]EEC1ERKNSt7__cxx1112basic_string*; +_ZNSt13basic_fstreamI[cw]St11char_traitsI[cw]EE4openERKNSt7__cxx1112basic_string*; +_ZNSt14basic_[io]fstreamI[cw]St11char_traitsI[cw]EEC1ERKNSt7__cxx1112basic_string*; +_ZNSt14basic_[io]fstreamI[cw]St11char_traitsI[cw]EE4openERKNSt7__cxx1112basic_string*; + # std::locale::name() returning new std::string _ZNKSt6locale4nameB5cxx11Ev;
Re: [PATCH] PR59448 - Promote consume to acquire
On Tue, Jan 13, 2015 at 3:56 PM, Andrew MacLeod wrote: > Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448 > > Basically we can generate incorrect code for an atomic consume operation in > some circumstances. The general feeling seems to be that we should simply > promote all consume operations to an acquire operation until there is a > better definition/understanding of the consume model and how GCC can track > it. > > I proposed a simple patch in the PR, and I have not seen or heard of any > dissenting opinion. We should get this in before the end of stage 3 I > think. > > The problem with the patch in the PR is the memory model is immediately > promoted from consume to acquire. This happens *before* any of the > memmodel checks are made. If a consume is illegally specified (such as in a > compare_exchange), it gets promoted to acquire and the compiler doesn't > report the error because it never sees the consume. > > This new patch simply makes the adjustment after any errors are checked on > the originally specified model. It bootstraps on x86_64-unknown-linux-gnu > and passes all regression testing. > I also built an aarch64 compiler and it appears to issue the LDAR as > specified in the PR, but anyone with a vested interest really ought to check > it out with a real build to be sure. > > OK for trunk? Why not patch get_memmodel? (not sure if that catches all cases) Richard. > > Andrew
Re: [PATCH] rs6000: Make rs6000_split_logical handle inverted 2nd operand (PR64358)
On Mon, Jan 12, 2015 at 6:52 PM, Pat Haugen wrote: > Following backport tested on 4.8/4.9 with no new regressions. Ok to commit > to those branches? > > -Pat > > > 2015-01-12 Pat Haugen > > Backport from mainline > 2014-12-20 Segher Boessenkool > > > PR target/64358 > * config/rs6000/rs6000.c (rs6000_split_logical_inner): Swap the > input operands if only the second is inverted. > * config/rs6000/rs6000.md (*boolc3_internal1 for BOOL_128): > Swap BOOL_REGS_OP1 and BOOL_REGS_OP2. Correct arguments to > rs6000_split_logical. > (*boolc3_internal2 for TI2): Swap operands[1] and operands[2]. Okay. Thanks, David
Re: [PATCH] PR59448 - Promote consume to acquire
On 01/13/2015 09:59 AM, Richard Biener wrote: On Tue, Jan 13, 2015 at 3:56 PM, Andrew MacLeod wrote: Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448 Basically we can generate incorrect code for an atomic consume operation in some circumstances. The general feeling seems to be that we should simply promote all consume operations to an acquire operation until there is a better definition/understanding of the consume model and how GCC can track it. I proposed a simple patch in the PR, and I have not seen or heard of any dissenting opinion. We should get this in before the end of stage 3 I think. The problem with the patch in the PR is the memory model is immediately promoted from consume to acquire. This happens *before* any of the memmodel checks are made. If a consume is illegally specified (such as in a compare_exchange), it gets promoted to acquire and the compiler doesn't report the error because it never sees the consume. This new patch simply makes the adjustment after any errors are checked on the originally specified model. It bootstraps on x86_64-unknown-linux-gnu and passes all regression testing. I also built an aarch64 compiler and it appears to issue the LDAR as specified in the PR, but anyone with a vested interest really ought to check it out with a real build to be sure. OK for trunk? Why not patch get_memmodel? (not sure if that catches all cases) Richard. That was the original patch. The issue is that it promotes consume to acquire before any error checking gets to look at the model, so then we allow illegal specification of consume. (It actually triggers a failure in the testsuite) Andrew
[[ARM/AArch64][testsuite] 01/36] Add explicit dependency on Neon Cumulative Saturation flag (QC).
__set_neon_cumulative_sat() modifies the contents on the QC flag, and some intrinsics do so too: this patch adds the explicit dependency on the asm statement, to avoid code reordering or removal. When writing QC, the asm statement now has a fake input dependency, which is the output of the intrinsic being tested. Modifying the __set_neon_cumulative_sat macro is necessary, to be able to accept all the possible input types. Update the generic code in unary_sat_op.inc and binary_sat_op.inc accordingly. * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (Set_Neon_Cumulative_Sat): Add parameter. (__set_neon_cumulative_sat): Support new parameter. * gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc (TEST_BINARY_SAT_OP1): Call Set_Neon_Cumulative_Sat with new argument. * gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc (TEST_UNARY_SAT_OP1): Call Set_Neon_Cumulative_Sat with new argument. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h index 8ea1f26..6464c66 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h @@ -255,7 +255,11 @@ typedef union { #endif /* __ORDER_BIG_ENDIAN__ */ #define Neon_Cumulative_Sat __read_neon_cumulative_sat() -#define Set_Neon_Cumulative_Sat(x) __set_neon_cumulative_sat((x)) +/* We need a fake dependency to ensure correct ordering of asm + statements to preset the QC flag value, and Neon operators writing + to QC. */ +#define Set_Neon_Cumulative_Sat(x, depend) \ + __set_neon_cumulative_sat((x), (depend)) #if defined(__aarch64__) static volatile int __read_neon_cumulative_sat (void) { @@ -263,13 +267,12 @@ static volatile int __read_neon_cumulative_sat (void) { asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc)); return _afpscr_for_qc.b.QC; } -static void __set_neon_cumulative_sat (int x) { -_ARM_FPSCR _afpscr_for_qc; -asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc)); -_afpscr_for_qc.b.QC = x; -asm volatile ("msr fpsr,%0" : : "r" (_afpscr_for_qc)); -return; -} +#define __set_neon_cumulative_sat(x, depend) { \ +_ARM_FPSCR _afpscr_for_qc; \ +asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc)); \ +_afpscr_for_qc.b.QC = x; \ +asm volatile ("msr fpsr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \ + } #else static volatile int __read_neon_cumulative_sat (void) { _ARM_FPSCR _afpscr_for_qc; @@ -277,13 +280,12 @@ static volatile int __read_neon_cumulative_sat (void) { return _afpscr_for_qc.b.QC; } -static void __set_neon_cumulative_sat (int x) { -_ARM_FPSCR _afpscr_for_qc; -asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc)); -_afpscr_for_qc.b.QC = x; -asm volatile ("vmsr fpscr,%0" : : "r" (_afpscr_for_qc)); -return; -} +#define __set_neon_cumulative_sat(x, depend) { \ +_ARM_FPSCR _afpscr_for_qc; \ +asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));\ +_afpscr_for_qc.b.QC = x; \ +asm volatile ("vmsr fpscr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \ + } #endif /* Declare expected cumulative saturation results, one for each diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc index 35d7701..c09a468 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc @@ -18,7 +18,7 @@ void FNNAME (INSN_NAME) (void) /* vector_res = OP(vector1,vector2), then store the result. */ #define TEST_BINARY_SAT_OP1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ VECT_VAR(vector2, T1, W, N)); \ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc index 3f6d984..0da1426 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc @@ -17,7 +17,7 @@ void FNNAME (INSN_NAME) (void) { /* y=OP(x), then store the result. */ #define TEST_UNARY_SAT_OP1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ - Set_Neon_Cumulativ
[[ARM/AArch64][testsuite] 03/36] Add vmax, vmin, vhadd, vhsub and vrhadd tests.
* gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vhadd.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vhsub.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vmax.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vmin.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vrhadd.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc new file mode 100644 index 000..36efe3a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc @@ -0,0 +1,120 @@ +/* Can't use the standard binary_op.inc template because vmax has no + 64 bits variant. */ + +#include + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + int i; + + /* Basic test: y=vmax(x,x), then store the result. */ +#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)\ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer". */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#ifndef NO_FLOAT_VARIANT + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); +#endif + + /* Choose init value arbitrarily, will be used as comparison value. */ + VDUP(vector2, , int, s, 8, 8, -13); + VDUP(vector2, , int, s, 16, 4, -14); + VDUP(vector2, , int, s, 32, 2, -16); + VDUP(vector2, , uint, u, 8, 8, 0xf3); + VDUP(vector2, , uint, u, 16, 4, 0xfff1); + VDUP(vector2, , uint, u, 32, 2, 0xfff0); + VDUP(vector2, q, int, s, 8, 16, -12); + VDUP(vector2, q, int, s, 16, 8, -13); + VDUP(vector2, q, int, s, 32, 4, -15); + VDUP(vector2, q, uint, u, 8, 16, 0xf9); + VDUP(vector2, q, uint, u, 16, 8, 0xfff2); + VDUP(vector2, q, uint, u, 32, 4, 0xfff1); +#ifndef NO_FLOAT_VARIANT + VDUP(vector2, , float, f, 32, 2, -15.5f); + VDUP(vector2, q, float, f, 32, 4, -14.5f); +#endif + +#ifndef NO_FLOAT_VARIANT +#define FLOAT_VARIANT(MACRO, VAR) \ + MACRO(VAR, , float, f, 32, 2); \ + MACRO(VAR, q, float, f, 32, 4) +#else +#define FLOAT_VARIANT(MACRO, VAR) +#endif + +#define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR) \ + MACRO(VAR, , int, s, 8, 8); \ + MACRO(VAR, , int, s, 16, 4); \ + MACRO(VAR, , int, s, 32, 2); \ + MACRO(VAR, , uint, u, 8, 8); \ + MACRO(VAR, , uint, u, 16, 4);\ + MACRO(VAR, , uint, u, 32, 2);\ + MACRO(VAR, q, int, s, 8, 16);\ + MACRO(VAR, q, int, s, 16, 8);\ + MACRO(VAR, q, int, s, 32, 4);\ + MACRO(VAR, q, uint, u, 8, 16); \ + MACRO(VAR, q, uint, u, 16, 8); \ + MACRO(VAR, q, uint, u, 32, 4); \ + FLOAT_VARIANT(MACRO, VAR) + + /* Apply a binary operator named INSN_NAME. */ + TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME); + + CHECK_RESULTS (TEST_MSG, ""); + +#ifndef NO_FLOAT_VARIANT + /* Extra FP tests with special values (NaN, ) */ + VDUP(vector, q, float, f, 32, 4, 1.0f); + VDUP(vector2, q, float, f, 32, 4, NAN); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_nan, " FP special (NaN)"); + + VDUP(vector, q, float, f, 32, 4, -NAN); + VDUP(vector2, q, float, f, 32, 4, 1.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_mnan, " FP special (-NaN)"); + + VDUP(vector, q, float, f, 32, 4, 1.0f); + VDUP(vector2, q, float, f, 32, 4, HUGE_VALF); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_inf, " FP special (inf)"); + + VDUP(vector, q, float, f, 32, 4, -HUGE_VALF); + VDUP(vector2, q, float, f, 32, 4, 1.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_minf, " FP special (-inf)"); + + VDUP(vector, q, float, f, 32, 4, 0.0f); + VDUP(vector2, q, float, f, 32, 4, -0.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_zero1, " FP special (-0.0)
[[ARM/AArch64][testsuite] 04/36] Add vld1_lane tests.
* gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c new file mode 100644 index 000..168cf5e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c @@ -0,0 +1,129 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xf0, 0xaa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x, 0x, 0x, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x, 0xfff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x, 0x, 0x, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x, 0xfff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x, 0x, 0x, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x, 0xc180 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xf0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x, 0x, 0x, 0x, + 0x, 0xfff0, 0x, 0x }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x, 0x, + 0xfff0, 0x }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x, + 0xfff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa, +0xaa, 0xaa, 0xaa, 0xaa, +0xaa, 0xaa, 0xaa, 0xaa, +0xf0, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x, 0x, 0x, 0x, +0x, 0x, 0xfff0, 0x }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x, 0x, +0xfff0, 0x }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfff0, +0x }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa, +0xaa, 0xaa, 0xaa, 0xaa, +0xaa, 0xaa, 0xaa, 0xaa, +0xf0, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x, 0x, 0x, 0x, +0x, 0x, 0xfff0, 0x }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x, 0x, + 0xc180, 0x }; + +#define TEST_MSG "VLD1_LANE/VLD1_LANEQ" +void exec_vld1_lane (void) +{ + /* Fill vector_src with 0xAA, then load 1 lane. */ +#define TEST_VLD1_LANE(Q, T1, T2, W, N, L) \ + memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, W/8*N); \ + VECT_VAR(vector_src, T1, W, N) = \ +vld1##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \ + VECT_VAR(vector, T1, W, N) = \ +vld1##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N), \ + VECT_VAR(vector_src, T1, W, N), L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)) + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_src); + + ARRAY(buffer_src, int, 8, 8); + ARRAY(buffer_src, int, 16, 4); + ARRAY(buffer_src, int, 32, 2); + ARRAY(buffer_src, int, 64, 1); + ARRAY(buffer_src, uint, 8, 8); + ARRAY(buffer_src, uint, 16, 4); + ARRAY(buffer_src, uint, 32, 2); + ARRAY(buffer_src, uint, 64, 1); + ARRAY(buffer_src, poly, 8, 8); + ARRAY(buffer_src, poly, 16, 4); + ARRAY(buffer_src, float, 32, 2); + + ARRAY(buffer_src, int, 8, 16); + ARRAY(buffer_src, int, 16, 8); + ARRAY(buffer_src, int, 32, 4); + ARRAY(buffer_src, int, 64, 2); + ARRAY(buffer_src, uint, 8, 16); + ARRAY(buffer_src, uint, 16, 8); + ARRAY(buffer_src, uint, 32, 4); + ARRAY(buffer_src, uint, 64, 2); + ARRAY(buffer_src, poly, 8, 16); + ARRAY(buffer_src, poly, 16, 8); + ARRAY(buffer_src, float, 32, 4); + + clean_results (); + + /* Choose lane arbitrarily. */ + TEST_VLD1_LANE(, int, s, 8, 8, 6); + TEST_VLD1_LANE(, int, s, 16,
[[ARM/AArch64][testsuite] 00/36] More Neon intrinsics tests.
This patch series is a follow-up of the conversion of my existing testsuite into DejaGnu. It does not yet cover all the tests I wrote, but I chose to post this set to have a chance to have it accepted before stage 4. I will have 35 more files to convert after this set. Most of the patches only add new tests/files, but a few modify existing files as follows: Patch 1: Fix dependency for Neon cumulative saturation (QC) flag. Patch 2: Add traces in the CHECK* macro, to assert that a test was actually executed. Patch 8: Refactor vuzp and vzip tests. Patch 9: Split vaddhn.c into vXXXhn.inc and vaddhn.c to share code with other new tests. Patch 19: Put most of the code in common with vaddl in vXXXl.inc. Patch 20: Put most of the code in common with vaddw through vXXWw.inc Christophe Lyon (36): Add explicit dependency on Neon Cumulative Saturation flag (QC). Be more verbose, and actually confirm that a test was checked. Add vmax, vmin, vhadd, vhsub and vrhadd tests. Add vld1_lane tests. Add vldX_dup test. Add vmla and vmls tests. Add vmla_lane and vmls_lane tests. Add vtrn tests. Refactor vzup and vzip tests. Add vsubhn, vraddhn and vrsubhn tests. Split vaddhn.c into vXXXhn.inc and vaddhn.c to share code with other new tests. Add vmlal and vmlsl tests. Add vmlal_lane and vmlsl_lane tests. Add vmlal_n and vmlsl_n tests. Add vmla_n and vmls_n tests. Add vqdmlal and vqdmlsl tests. Add vqdmlal_lane and vqdmlsl_lane tests. Add vqdmlal_n and vqdmlsl_n tests. Add vpadd, vpmax and vpmin tests. Add vsli_n and vsri_n tests. Add vsubl tests, put most of the code in common with vaddl in vXXXl.inc. Add vsubw tests, putting most of the code in common with vaddw through vXXWw.inc Add vmovl tests. Add vmovn tests. Add vmul_lane tests. Add vmul_n tests. Add vmull tests. Add vmull_lane tests. Add vmull_n tests. Add vmnv tests. Add vpadal tests. Add vpaddl tests. Add vqdmulh tests. Add vqdmulh_lane tests. Add vqdmulh_n tests. Add vqdmull tests. Add vqdmull_lane tests. Add vqdmull_n tests. .../aarch64/advsimd-intrinsics/arm-neon-ref.h | 35 +- .../aarch64/advsimd-intrinsics/binary_op_no64.inc | 120 .../aarch64/advsimd-intrinsics/binary_sat_op.inc | 2 +- .../aarch64/advsimd-intrinsics/unary_sat_op.inc| 2 +- .../aarch64/advsimd-intrinsics/vXXXhn.inc | 50 ++ .../aarch64/advsimd-intrinsics/vXXXl.inc | 70 +++ .../aarch64/advsimd-intrinsics/vXXXw.inc | 70 +++ .../gcc.target/aarch64/advsimd-intrinsics/vaddhn.c | 57 +- .../gcc.target/aarch64/advsimd-intrinsics/vaddl.c | 77 +-- .../gcc.target/aarch64/advsimd-intrinsics/vaddw.c | 77 +-- .../gcc.target/aarch64/advsimd-intrinsics/vhadd.c | 54 ++ .../gcc.target/aarch64/advsimd-intrinsics/vhsub.c | 52 ++ .../aarch64/advsimd-intrinsics/vld1_lane.c | 129 .../aarch64/advsimd-intrinsics/vldX_dup.c | 671 + .../gcc.target/aarch64/advsimd-intrinsics/vmax.c | 64 ++ .../gcc.target/aarch64/advsimd-intrinsics/vmin.c | 66 ++ .../gcc.target/aarch64/advsimd-intrinsics/vmlX.inc | 110 .../aarch64/advsimd-intrinsics/vmlX_lane.inc | 91 +++ .../aarch64/advsimd-intrinsics/vmlX_n.inc | 78 +++ .../aarch64/advsimd-intrinsics/vmlXl.inc | 89 +++ .../aarch64/advsimd-intrinsics/vmlXl_lane.inc | 70 +++ .../aarch64/advsimd-intrinsics/vmlXl_n.inc | 61 ++ .../gcc.target/aarch64/advsimd-intrinsics/vmla.c | 50 ++ .../aarch64/advsimd-intrinsics/vmla_lane.c | 50 ++ .../gcc.target/aarch64/advsimd-intrinsics/vmla_n.c | 50 ++ .../gcc.target/aarch64/advsimd-intrinsics/vmlal.c | 18 + .../aarch64/advsimd-intrinsics/vmlal_lane.c| 14 + .../aarch64/advsimd-intrinsics/vmlal_n.c | 14 + .../gcc.target/aarch64/advsimd-intrinsics/vmls.c | 52 ++ .../aarch64/advsimd-intrinsics/vmls_lane.c | 52 ++ .../gcc.target/aarch64/advsimd-intrinsics/vmls_n.c | 52 ++ .../gcc.target/aarch64/advsimd-intrinsics/vmlsl.c | 22 + .../aarch64/advsimd-intrinsics/vmlsl_lane.c| 18 + .../aarch64/advsimd-intrinsics/vmlsl_n.c | 18 + .../gcc.target/aarch64/advsimd-intrinsics/vmovl.c | 77 +++ .../gcc.target/aarch64/advsimd-intrinsics/vmovn.c | 50 ++ .../aarch64/advsimd-intrinsics/vmul_lane.c | 104 .../gcc.target/aarch64/advsimd-intrinsics/vmul_n.c | 96 +++ .../gcc.target/aarch64/advsimd-intrinsics/vmull.c | 75 +++ .../aarch64/advsimd-intrinsics/vmull_lane.c| 66 ++ .../aarch64/advsimd-intrinsics/vmull_n.c | 61 ++ .../gcc.target/aarch64/advsimd-intrinsics/vmvn.c | 136 + .../aarch64/advsimd-intrinsics/vpXXX.inc | 67 ++ .../gcc.target/aarch64/advsimd-intrinsics/vpadal.c | 155 + .../gcc.target/aarch64/advsimd-intrinsics/vpadd.c | 19 + .../gcc.target/aarch64/advsimd-intrinsics/vpaddl.c | 129 .../gcc.target/aarch64/advsimd-intrinsics/vpmax.c |
[[ARM/AArch64][testsuite] 05/36] Add vldX_dup test.
* gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c new file mode 100644 index 000..53cd8f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c @@ -0,0 +1,671 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ + +/* vld2_dup/chunk 0. */ +VECT_VAR_DECL(expected_vld2_0,int,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, + 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected_vld2_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected_vld2_0,int,32,2) [] = { 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected_vld2_0,int,64,1) [] = { 0xfff0 }; +VECT_VAR_DECL(expected_vld2_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, + 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected_vld2_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected_vld2_0,uint,32,2) [] = { 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfff0 }; +VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, + 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc180, 0xc170 }; +VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0x, 0x, 0x, 0x, + 0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected_vld2_0,int,32,4) [] = { 0x, 0x, + 0x, 0x }; +VECT_VAR_DECL(expected_vld2_0,int,64,2) [] = { 0x, + 0x }; +VECT_VAR_DECL(expected_vld2_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected_vld2_0,uint,16,8) [] = { 0x, 0x, 0x, 0x, +0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0x, 0x, +0x, 0x }; +VECT_VAR_DECL(expected_vld2_0,uint,64,2) [] = { 0x, +0x }; +VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0x, 0x, 0x, 0x, +0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0x, 0x, + 0x, 0x }; + +/* vld2_dup/chunk 1. */ +VECT_VAR_DECL(expected_vld2_1,int,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, + 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected_vld2_1,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected_vld2_1,int,32,2) [] = { 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected_vld2_1,int,64,1) [] = { 0xfff1 }; +VECT_VAR_DECL(expected_vld2_1,uint,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, + 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected_vld2_1,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0xfff1 }; +VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, + 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff0, 0xfff1, + 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc180, 0xc170 }; +VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0x, 0x, 0x, 0x, +
[[ARM/AArch64][testsuite] 11/36] Add vmlal_lane and vmlsl_lane tests.
* gcc.target/aarch64/advsimd-intrinsics/vmlXl_lane.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vmlal_lane.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vmlsl_lane.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_lane.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_lane.inc new file mode 100644 index 000..ca45134 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_lane.inc @@ -0,0 +1,70 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* vector_res = vmlxl_lane(vector, vector3, vector4, lane), + then store the result. */ +#define TEST_VMLXL_LANE1(INSN, T1, T2, W, W2, N, V)\ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ +VECT_VAR(vector3, T1, W2, N), \ +VECT_VAR(vector4, T1, W2, N), \ +V); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLXL_LANE(INSN, T1, T2, W, W2, N, V) \ + TEST_VMLXL_LANE1(INSN, T1, T2, W, W2, N, V) + + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector4, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector4, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector3, uint, 16, 4); + DECL_VARIABLE(vector4, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 4); + + DECL_VARIABLE(vector, uint, 64, 2); + DECL_VARIABLE(vector3, uint, 32, 2); + DECL_VARIABLE(vector4, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, uint, u, 64, 2); + + VDUP(vector3, , int, s, 16, 4, 0x55); + VDUP(vector4, , int, s, 16, 4, 0xBB); + VDUP(vector3, , int, s, 32, 2, 0x55); + VDUP(vector4, , int, s, 32, 2, 0xBB); + VDUP(vector3, , uint, u, 16, 4, 0x55); + VDUP(vector4, , uint, u, 16, 4, 0xBB); + VDUP(vector3, , uint, u, 32, 2, 0x55); + VDUP(vector4, , uint, u, 32, 2, 0xBB); + + TEST_VMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 2); + TEST_VMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 1); + TEST_VMLXL_LANE(INSN_NAME, uint, u, 32, 16, 4, 2); + TEST_VMLXL_LANE(INSN_NAME, uint, u, 64, 32, 2, 1); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_lane.c new file mode 100644 index 000..0a384a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_lane.c @@ -0,0 +1,14 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vmlal_lane +#define TEST_MSG "VMLAL_LANE" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3e07, 0x3e08 }; + +#include "vmlXl_lane.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_lane.c new file mode 100644 index 000..8b944a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_lane.c @@ -0,0 +1,18 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vmlsl_lane +#define TEST_MSG "VMLSL_LANE" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0xc1d9, 0xc1da, + 0xc1db, 0xc1dc }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xc1d9, + 0xc1da }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xc1d9, 0xc1da, +0xc1db, 0xc1dc }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xc1d9, +0xc1da }; + +#include "vmlXl_lane.inc" -- 2.1.0
[[ARM/AArch64][testsuite] 10/36] Add vmlal and vmlsl tests.
* gcc.target/aarch64/advsimd-intrinsics/vmlXl.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vmlal.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vmlsl.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl.inc new file mode 100644 index 000..1e6bab3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl.inc @@ -0,0 +1,89 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* vector_res = OP(vector, vector3, vector4), + then store the result. */ +#define TEST_VMLXL1(INSN, T1, T2, W, W2, N)\ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ +VECT_VAR(vector3, T1, W2, N), \ +VECT_VAR(vector4, T1, W2, N)); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLXL(INSN, T1, T2, W, W2, N) \ + TEST_VMLXL1(INSN, T1, T2, W, W2, N) + + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector3, int, 8, 8); + DECL_VARIABLE(vector4, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 8); + + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector4, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector4, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector3, uint, 8, 8); + DECL_VARIABLE(vector4, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 8); + + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector3, uint, 16, 4); + DECL_VARIABLE(vector4, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 4); + + DECL_VARIABLE(vector, uint, 64, 2); + DECL_VARIABLE(vector3, uint, 32, 2); + DECL_VARIABLE(vector4, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, uint, u, 64, 2); + + VDUP(vector3, , int, s, 8, 8, 0x55); + VDUP(vector4, , int, s, 8, 8, 0xBB); + VDUP(vector3, , int, s, 16, 4, 0x55); + VDUP(vector4, , int, s, 16, 4, 0xBB); + VDUP(vector3, , int, s, 32, 2, 0x55); + VDUP(vector4, , int, s, 32, 2, 0xBB); + VDUP(vector3, , uint, u, 8, 8, 0x55); + VDUP(vector4, , uint, u, 8, 8, 0xBB); + VDUP(vector3, , uint, u, 16, 4, 0x55); + VDUP(vector4, , uint, u, 16, 4, 0xBB); + VDUP(vector3, , uint, u, 32, 2, 0x55); + VDUP(vector4, , uint, u, 32, 2, 0xBB); + + TEST_VMLXL(INSN_NAME, int, s, 16, 8, 8); + TEST_VMLXL(INSN_NAME, int, s, 32, 16, 4); + TEST_VMLXL(INSN_NAME, int, s, 64, 32, 2); + TEST_VMLXL(INSN_NAME, uint, u, 16, 8, 8); + TEST_VMLXL(INSN_NAME, uint, u, 32, 16, 4); + TEST_VMLXL(INSN_NAME, uint, u, 64, 32, 2); + + CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal.c new file mode 100644 index 000..c147f31 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal.c @@ -0,0 +1,18 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vmlal +#define TEST_MSG "VMLAL" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,16,8) [] = { 0xe907, 0xe908, 0xe909, 0xe90a, + 0xe90b, 0xe90c, 0xe90d, 0xe90e }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a, +0x3e0b, 0x3e0c, 0x3e0d, 0x3e0e }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3e07, 0x3e08 }; + +#include "vmlXl.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl.c new file mode 100644 index 000..6c984ae --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl.c @@ -0,0 +1,22 @@ +#include +#include "arm-neon-ref.h" +#include "compute-re
[[ARM/AArch64][testsuite] 09/36] Add vsubhn, vraddhn and vrsubhn tests. Split vaddhn.c into vXXXhn.inc and vaddhn.c to share code with other new tests.
* gcc.target/aarch64/advsimd-intrinsics/vXXXhn.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vraddhn.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vrsubhn.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vsubhn.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vaddhn.c: Use code from vXXXhn.inc. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXhn.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXhn.inc new file mode 100644 index 000..0dbcc92 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXhn.inc @@ -0,0 +1,50 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* Basic test: vec64=vaddhn(vec128_a, vec128_b), then store the result. */ +#define TEST_VADDHN1(INSN, T1, T2, W, W2, N) \ + VECT_VAR(vector64, T1, W2, N) = INSN##_##T2##W(VECT_VAR(vector1, T1, W, N), \ +VECT_VAR(vector2, T1, W, N)); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N)) + +#define TEST_VADDHN(INSN, T1, T2, W, W2, N)\ + TEST_VADDHN1(INSN, T1, T2, W, W2, N) + + DECL_VARIABLE_64BITS_VARIANTS(vector64); + DECL_VARIABLE_128BITS_VARIANTS(vector1); + DECL_VARIABLE_128BITS_VARIANTS(vector2); + + clean_results (); + + /* Fill input vector1 and vector2 with arbitrary values */ + VDUP(vector1, q, int, s, 16, 8, 50*(UINT8_MAX+1)); + VDUP(vector1, q, int, s, 32, 4, 50*(UINT16_MAX+1)); + VDUP(vector1, q, int, s, 64, 2, 24*((uint64_t)UINT32_MAX+1)); + VDUP(vector1, q, uint, u, 16, 8, 3*(UINT8_MAX+1)); + VDUP(vector1, q, uint, u, 32, 4, 55*(UINT16_MAX+1)); + VDUP(vector1, q, uint, u, 64, 2, 3*((uint64_t)UINT32_MAX+1)); + + VDUP(vector2, q, int, s, 16, 8, (uint16_t)UINT8_MAX); + VDUP(vector2, q, int, s, 32, 4, (uint32_t)UINT16_MAX); + VDUP(vector2, q, int, s, 64, 2, (uint64_t)UINT32_MAX); + VDUP(vector2, q, uint, u, 16, 8, (uint16_t)UINT8_MAX); + VDUP(vector2, q, uint, u, 32, 4, (uint32_t)UINT16_MAX); + VDUP(vector2, q, uint, u, 64, 2, (uint64_t)UINT32_MAX); + + TEST_VADDHN(INSN_NAME, int, s, 16, 8, 8); + TEST_VADDHN(INSN_NAME, int, s, 32, 16, 4); + TEST_VADDHN(INSN_NAME, int, s, 64, 32, 2); + TEST_VADDHN(INSN_NAME, uint, u, 16, 8, 8); + TEST_VADDHN(INSN_NAME, uint, u, 32, 16, 4); + TEST_VADDHN(INSN_NAME, uint, u, 64, 32, 2); + + CHECK_RESULTS (TEST_MSG, ""); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c index 58fd5ea..88c92f3 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c @@ -8,6 +8,9 @@ #include #endif +#define INSN_NAME vaddhn +#define TEST_MSG "VADDHN" + /* Expected results. */ VECT_VAR_DECL(expected,int,8,8) [] = { 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32 }; @@ -52,56 +55,4 @@ VECT_VAR_DECL(expected,poly,16,8) [] = { 0x, 0x, 0x, 0x, VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x, 0x, 0x, 0x }; -#define INSN_NAME vaddhn -#define TEST_MSG "VADDHN" - -#define FNNAME1(NAME) exec_ ## NAME -#define FNNAME(NAME) FNNAME1(NAME) - -void FNNAME (INSN_NAME) (void) -{ - /* Basic test: vec64=vaddhn(vec128_a, vec128_b), then store the result. */ -#define TEST_VADDHN1(INSN, T1, T2, W, W2, N) \ - VECT_VAR(vector64, T1, W2, N) = INSN##_##T2##W(VECT_VAR(vector1, T1, W, N), \ -VECT_VAR(vector2, T1, W, N)); \ - vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N)) - -#define TEST_VADDHN(INSN, T1, T2, W, W2, N)\ - TEST_VADDHN1(INSN, T1, T2, W, W2, N) - - DECL_VARIABLE_64BITS_VARIANTS(vector64); - DECL_VARIABLE_128BITS_VARIANTS(vector1); - DECL_VARIABLE_128BITS_VARIANTS(vector2); - - clean_results (); - - /* Fill input vector1 and vector2 with arbitrary values */ - VDUP(vector1, q, int, s, 16, 8, 50*(UINT8_MAX+1)); - VDUP(vector1, q, int, s, 32, 4, 50*(UINT16_MAX+1)); - VDUP(vector1, q, int, s, 64, 2, 24*((uint64_t)UINT32_MAX+1)); - VDUP(vector1, q, uint, u, 16, 8, 3*(UINT8_MAX+1)); - VDUP(vector1, q, uint, u, 32, 4, 55*(UINT16_MAX+1)); - VDUP(vector1, q, uint, u, 64, 2, 3*((uint64_t)UINT32_MAX+1)); - - VDUP(vector2, q, int, s, 16, 8, (uint16_t)UINT8_MAX); - VDUP(vector2, q, int, s, 32, 4, (uint32_t)UINT16_MAX); - VDUP(vector2, q, int, s, 64, 2, (uint64_t)UINT32_MAX); - VDUP(vector2, q, uint, u, 16, 8, (uint16_t)UINT8_MAX); - VDUP(vector2, q, uint, u, 32, 4, (uint32_t)UINT16_MAX); - VDUP(vector2, q, uint, u, 64, 2, (uint64_t)UINT32_MAX); - - TEST_VADDHN(INSN_NAME, int, s, 16, 8, 8); - TEST_VADDHN(INSN_NAME, int, s, 32, 16, 4); -
[[ARM/AArch64][testsuite] 12/36] Add vmlal_n and vmlsl_n tests.
* gcc.target/aarch64/advsimd-intrinsics/vmlXl_n.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vmlal_n.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vmlsl_n.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_n.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_n.inc new file mode 100644 index 000..a968584 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_n.inc @@ -0,0 +1,61 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* vector_res = vmlxl_n(vector, vector2, val), + then store the result. */ +#define TEST_VMLXL_N1(INSN, T1, T2, W, W2, N, V) \ + VECT_VAR(vector_res, T1, W, N) = INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W2, N), \ + V); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLXL_N(INSN, T1, T2, W, W2, N, V)\ + TEST_VMLXL_N1(INSN, T1, T2, W, W2, N, V) + + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 4); + + DECL_VARIABLE(vector, uint, 64, 2); + DECL_VARIABLE(vector2, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, uint, u, 64, 2); + + VDUP(vector2, , int, s, 16, 4, 0x55); + VDUP(vector2, , int, s, 32, 2, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x55); + VDUP(vector2, , uint, u, 32, 2, 0x55); + + /* Choose multiplier arbitrarily. */ + TEST_VMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x11); + TEST_VMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x22); + TEST_VMLXL_N(INSN_NAME, uint, u, 32, 16, 4, 0x33); + TEST_VMLXL_N(INSN_NAME, uint, u, 64, 32, 2, 0x33); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_n.c new file mode 100644 index 000..118068c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_n.c @@ -0,0 +1,14 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vmlal_n +#define TEST_MSG "VMLAL_N" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0x595, 0x596, 0x597, 0x598 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xb3a, 0xb3b }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x10df, 0x10e0, 0x10e1, 0x10e2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x10df, 0x10e0 }; + +#include "vmlXl_n.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_n.c new file mode 100644 index 000..a26c69f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_n.c @@ -0,0 +1,18 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vmlsl_n +#define TEST_MSG "VMLSL_N" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfa4b, 0xfa4c, + 0xfa4d, 0xfa4e }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xf4a6, + 0xf4a7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xef01, 0xef02, +0xef03, 0xef04 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xef01, +0xef02 }; + +#include "vmlXl_n.inc" -- 2.1.0
[[ARM/AArch64][testsuite] 07/36] Add vmla_lane and vmls_lane tests.
* gcc.target/aarch64/advsimd-intrinsics/vmlX_lane.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vmla_lane.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vmls_lane.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_lane.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_lane.inc new file mode 100644 index 000..b644a0e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_lane.inc @@ -0,0 +1,91 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ +#define DECL_VMLX_LANE(VAR)\ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2);\ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, float, 32, 4) + + /* vector_res = vmlx_lane(vector, vector2, vector3, lane), + then store the result. */ +#define TEST_VMLX_LANE1(INSN, Q, T1, T2, W, N, N2, L) \ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + VECT_VAR(vector3, T1, W, N2),\ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLX_LANE(INSN, Q, T1, T2, W, N, N2, V) \ + TEST_VMLX_LANE1(INSN, Q, T1, T2, W, N, N2, V) + + DECL_VMLX_LANE(vector); + DECL_VMLX_LANE(vector2); + DECL_VMLX_LANE(vector_res); + + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector3, uint, 16, 4); + DECL_VARIABLE(vector3, uint, 32, 2); + DECL_VARIABLE(vector3, float, 32, 2); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); + + VDUP(vector2, , int, s, 16, 4, 0x55); + VDUP(vector2, , int, s, 32, 2, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x55); + VDUP(vector2, , uint, u, 32, 2, 0x55); + VDUP(vector2, , float, f, 32, 2, 55.3f); + VDUP(vector2, q, int, s, 16, 8, 0x55); + VDUP(vector2, q, int, s, 32, 4, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x55); + VDUP(vector2, q, uint, u, 32, 4, 0x55); + VDUP(vector2, q, float, f, 32, 4, 55.8f); + + VDUP(vector3, , int, s, 16, 4, 0xBB); + VDUP(vector3, , int, s, 32, 2, 0xBB); + VDUP(vector3, , uint, u, 16, 4, 0xBB); + VDUP(vector3, , uint, u, 32, 2, 0xBB); + VDUP(vector3, , float, f, 32, 2, 11.34f); + + /* Choose lane arbitrarily. */ + TEST_VMLX_LANE(INSN_NAME, , int, s, 16, 4, 4, 2); + TEST_VMLX_LANE(INSN_NAME, , int, s, 32, 2, 2, 1); + TEST_VMLX_LANE(INSN_NAME, , uint, u, 16, 4, 4, 2); + TEST_VMLX_LANE(INSN_NAME, , uint, u, 32, 2, 2, 1); + TEST_VMLX_LANE(INSN_NAME, , float, f, 32, 2, 2, 1); + TEST_VMLX_LANE(INSN_NAME, q, int, s, 16, 8, 4, 3); + TEST_VMLX_LANE(INSN_NAME, q, int, s, 32, 4, 2, 1); + TEST_VMLX_LANE(INSN_NAME, q, uint, u, 16, 8, 4, 2); + TEST_VMLX_LANE(INSN_NAME, q, uint, u, 32, 4, 2, 1); + TEST_VMLX_LANE(INSN_NAME, q, float, f, 32, 4, 2, 1); + + CHECK_RESULTS (TEST_MSG, ""); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_lane.c new file mode 100644 index 000..f4b89d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_lane.c @@ -0,0 +1,50 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vmla +#define TEST_MSG "VMLA_LANE" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,uint,32,2)
[[ARM/AArch64][testsuite] 13/36] Add vmla_n and vmls_n tests.
* gcc.target/aarch64/advsimd-intrinsics/vmlX_n.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vmla_n.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vmls_n.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_n.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_n.inc new file mode 100644 index 000..34cdbe8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_n.inc @@ -0,0 +1,78 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ +#define DECL_VMLX_N(VAR) \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2);\ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, float, 32, 4);\ + DECL_VARIABLE(VAR, uint, 32, 4) + + /* vector_res = vmlx_n(vector, vector2, val), + then store the result. */ +#define TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N),\ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLX_N(INSN, Q, T1, T2, W, N, V) \ + TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V) + + DECL_VMLX_N(vector); + DECL_VMLX_N(vector2); + DECL_VMLX_N(vector_res); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, float, f, 32, 4); + + VDUP(vector2, , int, s, 16, 4, 0x55); + VDUP(vector2, , int, s, 32, 2, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x55); + VDUP(vector2, , uint, u, 32, 2, 0x55); + VDUP(vector2, , float, f, 32, 2, 55.2f); + VDUP(vector2, q, int, s, 16, 8, 0x55); + VDUP(vector2, q, int, s, 32, 4, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x55); + VDUP(vector2, q, uint, u, 32, 4, 0x55); + VDUP(vector2, q, float, f, 32, 4, 55.9f); + + /* Choose multiplier arbitrarily. */ + TEST_VMLX_N(INSN_NAME, , int, s, 16, 4, 0x11); + TEST_VMLX_N(INSN_NAME, , int, s, 32, 2, 0x22); + TEST_VMLX_N(INSN_NAME, , uint, u, 16, 4, 0x33); + TEST_VMLX_N(INSN_NAME, , uint, u, 32, 2, 0x44); + TEST_VMLX_N(INSN_NAME, , float, f, 32, 2, 22.3f); + TEST_VMLX_N(INSN_NAME, q, int, s, 16, 8, 0x55); + TEST_VMLX_N(INSN_NAME, q, int, s, 32, 4, 0x66); + TEST_VMLX_N(INSN_NAME, q, uint, u, 16, 8, 0x77); + TEST_VMLX_N(INSN_NAME, q, uint, u, 32, 4, 0x88); + TEST_VMLX_N(INSN_NAME, q, float, f, 32, 4, 66.7f); + + CHECK_RESULTS (TEST_MSG, ""); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_n.c new file mode 100644 index 000..8376fe1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_n.c @@ -0,0 +1,50 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vmla +#define TEST_MSG "VMLA_N" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x595, 0x596, 0x597, 0x598 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xb3a, 0xb3b }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x10df, 0x10e0, 0x10e1, 0x10e2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1684, 0x1685 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4497deb8, 0x4497feb8 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,
[[ARM/AArch64][testsuite] 14/36] Add vqdmlal and vqdmlsl tests.
* gcc.target/aarch64/advsimd-intrinsics/vqdmlXl.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vqdmlal.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vqdmlsl.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlXl.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlXl.inc new file mode 100644 index 000..cd61fd4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlXl.inc @@ -0,0 +1,63 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* vector_res = OP(vector, vector3, vector4), + then store the result. */ +#define TEST_VQDMLXL1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + VECT_VAR(vector4, T1, W2, N)); \ +vst1q_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ +CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + +#define TEST_VQDMLXL(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMLXL1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) + + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector4, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector4, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + + VDUP(vector3, , int, s, 16, 4, 0x55); + VDUP(vector4, , int, s, 16, 4, 0xBB); + VDUP(vector3, , int, s, 32, 2, 0x55); + VDUP(vector4, , int, s, 32, 2, 0xBB); + + TEST_VQDMLXL(INSN_NAME, int, s, 32, 16, 4, expected_cumulative_sat, ""); + TEST_VQDMLXL(INSN_NAME, int, s, 64, 32, 2, expected_cumulative_sat, ""); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); + + VDUP(vector3, , int, s, 16, 4, 0x8000); + VDUP(vector4, , int, s, 16, 4, 0x8000); + VDUP(vector3, , int, s, 32, 2, 0x8000); + VDUP(vector4, , int, s, 32, 2, 0x8000); + +#define TEST_MSG2 "with saturation" + TEST_VQDMLXL(INSN_NAME, int, s, 32, 16, 4, expected_cumulative_sat2, TEST_MSG2); + TEST_VQDMLXL(INSN_NAME, int, s, 64, 32, 2, expected_cumulative_sat2, TEST_MSG2); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected2, TEST_MSG2); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlal.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlal.c new file mode 100644 index 000..c53a90a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlal.c @@ -0,0 +1,27 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vqdmlal +#define TEST_MSG "VQDMLAL" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7c1e, 0x7c1f, 0x7c20, 0x7c21 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7c1e, 0x7c1f }; + +/* Expected values of cumulative_saturation flag when saturation + occurs. */ +int VECT_VAR(expected_cumulative_sat2,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat2,int,64,2) = 1; + +/* Expected results when saturation occurs. */ +VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fef, 0x7ff0, +0x7ff1, 0x7ff2 }; +VECT_VAR_DECL(expected2,int,64,2) [] = { 0x7fef, +0x7ff0 }; + +#include "vqdmlXl.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlsl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlsl.c new file mode 100644 index 000..56e0b61 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlsl.c @@ -0,0 +1,29 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vqdmlsl +#define TEST_MSG "VQDMLSL" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0x83c2, 0x83c3, + 0x83c4, 0x83c5 }; +VECT_VAR_DECL(expected,int,64,2) [] =
[[ARM/AArch64][testsuite] 15/36] Add vqdmlal_lane and vqdmlsl_lane tests.
* gcc.target/aarch64/advsimd-intrinsics/vqdmlXl_lane.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vqdmlal_lane.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vqdmlsl_lane.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlXl_lane.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlXl_lane.inc new file mode 100644 index 000..705f90a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlXl_lane.inc @@ -0,0 +1,73 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* vector_res = vqdmlXl_lane(vector, vector3, vector4, lane), + then store the result. */ +#define TEST_VQDMLXL_LANE1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + VECT_VAR(vector4, T1, W2, N), \ + V); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N),\ + VECT_VAR(vector_res, T1, W, N));\ + CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + +#define TEST_VQDMLXL_LANE(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMLXL_LANE1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) + + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector4, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector4, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + + VDUP(vector3, , int, s, 16, 4, 0x55); + VDUP(vector4, , int, s, 16, 4, 0xBB); + VDUP(vector3, , int, s, 32, 2, 0x55); + VDUP(vector4, , int, s, 32, 2, 0xBB); + + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0, expected_cumulative_sat, ""); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0, expected_cumulative_sat, ""); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); + +#define TEST_MSG2 "(mul with input=0)" + VDUP(vector3, , int, s, 16, 4, 0); + VDUP(vector3, , int, s, 32, 2, 0); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0, expected_cumulative_sat2, TEST_MSG2); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0, expected_cumulative_sat2, TEST_MSG2); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected2, TEST_MSG2); + +#define TEST_MSG3 "(mul with saturation)" + VDUP(vector3, , int, s, 16, 4, 0x8000); + VDUP(vector3, , int, s, 32, 2, 0x8000); + VDUP(vector4, , int, s, 16, 4, 0x8000); + VDUP(vector4, , int, s, 32, 2, 0x8000); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0, expected_cumulative_sat3, TEST_MSG3); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0, expected_cumulative_sat3, TEST_MSG3); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected3, TEST_MSG3); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected3, TEST_MSG3); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlal_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlal_lane.c new file mode 100644 index 000..832a705 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlal_lane.c @@ -0,0 +1,38 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vqdmlal_lane +#define TEST_MSG "VQDMLAL_LANE" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7c1e, 0x7c1f, 0x7c20, 0x7c21 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7c1e, 0x7c1f }; + +/* Expected values of cumulative_saturation flag when multiplying with + 0. */ +int VECT_VAR(expected_cumulative_sat2,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat2,int,64,2) = 0; + +/* Expected values when multiplying with 0. */ +VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfff0, 0xfff1, +0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfff0, +0xfff1 }; + +/* Expected values of cumulative_saturation flag when multiplication + saturates. */ +int VECT_VAR(expected_cumulative_sat3,int,32,4) = 1; +int VECT
[[ARM/AArch64][testsuite] 16/36] Add vqdmlal_n and vqdmlsl_n tests.
* gcc.target/aarch64/advsimd-intrinsics/vqdmlXl_n.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vqdmlal_n.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vqdmlsl_n.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlXl_n.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlXl_n.inc new file mode 100644 index 000..fd885dd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlXl_n.inc @@ -0,0 +1,59 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* vector_res = vqdmlxl_n(vector, vector3, val), + then store the result. */ +#define TEST_VQDMLXL_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + V); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N),\ + VECT_VAR(vector_res, T1, W, N));\ + CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + +#define TEST_VQDMLXL_N(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMLXL_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) + + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + + VDUP(vector3, , int, s, 16, 4, 0x55); + VDUP(vector3, , int, s, 32, 2, 0x55); + + /* Choose val arbitrarily. */ + TEST_VQDMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x22, expected_cumulative_sat, ""); + TEST_VQDMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x33, expected_cumulative_sat, ""); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); + +#define TEST_MSG2 "(check mul cumulative saturation)" + VDUP(vector3, , int, s, 16, 4, 0x8000); + VDUP(vector3, , int, s, 32, 2, 0x8000); + + TEST_VQDMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x8000, expected_cumulative_sat2, TEST_MSG2); + TEST_VQDMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x8000, expected_cumulative_sat2, TEST_MSG2); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected2, TEST_MSG2); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlal_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlal_n.c new file mode 100644 index 000..b84bca3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlal_n.c @@ -0,0 +1,27 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vqdmlal_n +#define TEST_MSG "VQDMLAL_N" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0x1684, 0x1685, 0x1686, 0x1687 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x21ce, 0x21cf }; + +/* Expected values of cumulative_saturation flag when saturation + occurs. */ +int VECT_VAR(expected_cumulative_sat2,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat2,int,64,2) = 1; + +/* Expected results when saturation occurs. */ +VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fef, 0x7ff0, +0x7ff1, 0x7ff2 }; +VECT_VAR_DECL(expected2,int,64,2) [] = { 0x7fef, +0x7ff0 }; + +#include "vqdmlXl_n.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlsl_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlsl_n.c new file mode 100644 index 000..ff8d9d3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlsl_n.c @@ -0,0 +1,29 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vqdmlsl_n +#define TEST_MSG "VQDMLSL_N" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0xe95c, 0xe95d, + 0xe95e, 0xe95f }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xde12, + 0xde13 }; + +/* Expected va
[[ARM/AArch64][testsuite] 19/36] Add vsubl tests, put most of the code in common with vaddl in vXXXl.inc.
* gcc.target/aarch64/advsimd-intrinsics/vXXXl.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vsubl.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vaddl.c: Use code from vXXXl.inc. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXl.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXl.inc new file mode 100644 index 000..bd4c8fb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXl.inc @@ -0,0 +1,70 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* Basic test: y=vaddl(x1,x2), then store the result. */ +#define TEST_VADDL1(INSN, T1, T2, W, W2, N)\ + VECT_VAR(vector_res, T1, W2, N) =\ +INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N));\ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + +#define TEST_VADDL(INSN, T1, T2, W, W2, N) \ + TEST_VADDL1(INSN, T1, T2, W, W2, N) + + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + + DECL_VARIABLE(vector2, int, 8, 8); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 8, 8); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize input "vector" from "buffer". */ + VLOAD(vector, buffer, , int, s, 8, 8); + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 8, 8); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + + /* Choose init value arbitrarily. */ + VDUP(vector2, , int, s, 8, 8, -13); + VDUP(vector2, , int, s, 16, 4, -14); + VDUP(vector2, , int, s, 32, 2, -16); + VDUP(vector2, , uint, u, 8, 8, 0xf3); + VDUP(vector2, , uint, u, 16, 4, 0xfff1); + VDUP(vector2, , uint, u, 32, 2, 0xfff0); + + /* Execute the tests. */ + TEST_VADDL(INSN_NAME, int, s, 8, 16, 8); + TEST_VADDL(INSN_NAME, int, s, 16, 32, 4); + TEST_VADDL(INSN_NAME, int, s, 32, 64, 2); + TEST_VADDL(INSN_NAME, uint, u, 8, 16, 8); + TEST_VADDL(INSN_NAME, uint, u, 16, 32, 4); + TEST_VADDL(INSN_NAME, uint, u, 32, 64, 2); + + CHECK_RESULTS (TEST_MSG, ""); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddl.c index 030785d..020d9f8 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddl.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddl.c @@ -2,6 +2,9 @@ #include "arm-neon-ref.h" #include "compute-ref-data.h" +#define INSN_NAME vaddl +#define TEST_MSG "VADDL" + /* Expected results. */ VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; @@ -45,76 +48,4 @@ VECT_VAR_DECL(expected,poly,16,8) [] = { 0x, 0x, 0x, 0x, VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x, 0x, 0x, 0x }; -#define INSN_NAME vaddl -#define TEST_MSG "VADDL" - -#define FNNAME1(NAME) exec_ ## NAME -#define FNNAME(NAME) FNNAME1(NAME) - -void FNNAME (INSN_NAME) (void) -{ - /* Basic test: y=vaddl(x1,x2), then store the result. */ -#define TEST_VADDL1(INSN, T1, T2, W, W2, N)\ - VECT_VAR(vector_res, T1, W2, N) =\ -INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ - VECT_VAR(vector2, T1, W, N));\ - vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) - -#define TEST_VADDL(INSN, T1, T2, W, W2, N) \ - TEST_VADDL1(INSN, T1, T2, W, W2, N) - - DECL_VARIABLE(vector, int, 8, 8); - DECL_VARIABLE(vector, int, 16, 4); - DECL_VARIABLE(vector, int, 32, 2); - DECL_VARIABLE(vector, uint, 8, 8); - DECL_VARIABLE(vector, uint, 16, 4); - DECL_VARIABLE(vector, uint, 32, 2); - - DECL_VARIABLE(vector2, int, 8, 8); - DECL_VARIABLE(vector2, int, 16, 4); - DECL_VARIABLE(vector2, int, 32, 2); - DECL_VARIABLE(vector2, uint, 8, 8); - DECL_VARIABLE(vector2, uint, 16, 4); - DECL_VARIABLE(vector2, uint, 32, 2); - - DECL_VARIABLE(vector_res, int, 16, 8); - DECL_VARIABLE(vector_res, int, 32, 4); - DECL_VARIABLE(vector_res, int,
[[ARM/AArch64][testsuite] 18/36] Add vsli_n and vsri_n tests.
* gcc.target/aarch64/advsimd-intrinsics/vsXi_n.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vsli_n.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vsri_n.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsXi_n.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsXi_n.inc new file mode 100644 index 000..32c0d86 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsXi_n.inc @@ -0,0 +1,82 @@ +#define FNNAME1(NAME) exec_ ## NAME ##_n +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* vector_res = vsxi_n(vector, vector2, val), + then store the result. */ +#define TEST_VSXI_N1(INSN, Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),\ + VECT_VAR(vector2, T1, W, N),\ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VSXI_N(INSN, Q, T1, T2, W, N, V) \ + TEST_VSXI_N1(INSN, Q, T1, T2, W, N, V) + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer". */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Fill input vector2 with arbitrary values. */ + VDUP(vector2, , int, s, 8, 8, 2); + VDUP(vector2, , int, s, 16, 4, -4); + VDUP(vector2, , int, s, 32, 2, 3); + VDUP(vector2, , int, s, 64, 1, 100); + VDUP(vector2, , uint, u, 8, 8, 20); + VDUP(vector2, , uint, u, 16, 4, 30); + VDUP(vector2, , uint, u, 32, 2, 40); + VDUP(vector2, , uint, u, 64, 1, 2); + VDUP(vector2, , poly, p, 8, 8, 20); + VDUP(vector2, , poly, p, 16, 4, 30); + VDUP(vector2, q, int, s, 8, 16, -10); + VDUP(vector2, q, int, s, 16, 8, -20); + VDUP(vector2, q, int, s, 32, 4, -30); + VDUP(vector2, q, int, s, 64, 2, 24); + VDUP(vector2, q, uint, u, 8, 16, 12); + VDUP(vector2, q, uint, u, 16, 8, 3); + VDUP(vector2, q, uint, u, 32, 4, 55); + VDUP(vector2, q, uint, u, 64, 2, 3); + VDUP(vector2, q, poly, p, 8, 16, 12); + VDUP(vector2, q, poly, p, 16, 8, 3); + + /* Choose shift amount arbitrarily. */ + TEST_VSXI_N(INSN_NAME, , int, s, 8, 8, 4); + TEST_VSXI_N(INSN_NAME, , int, s, 16, 4, 3); + TEST_VSXI_N(INSN_NAME, , int, s, 32, 2, 1); + TEST_VSXI_N(INSN_NAME, , int, s, 64, 1, 32); + TEST_VSXI_N(INSN_NAME, , uint, u, 8, 8, 2); + TEST_VSXI_N(INSN_NAME, , uint, u, 16, 4, 10); + TEST_VSXI_N(INSN_NAME, , uint, u, 32, 2, 30); + TEST_VSXI_N(INSN_NAME, , uint, u, 64, 1, 3); + TEST_VSXI_N(INSN_NAME, , poly, p, 8, 8, 2); + TEST_VSXI_N(INSN_NAME, , poly, p, 16, 4, 10); + TEST_VSXI_N(INSN_NAME, q, int, s, 8, 16, 5); + TEST_VSXI_N(INSN_NAME, q, int, s, 16, 8, 3); + TEST_VSXI_N(INSN_NAME, q, int, s, 32, 4, 20); + TEST_VSXI_N(INSN_NAME, q, int, s, 64, 2, 16); + TEST_VSXI_N(INSN_NAME, q, uint, u, 8, 16, 3); + TEST_VSXI_N(INSN_NAME, q, uint, u, 16, 8, 12); + TEST_VSXI_N(INSN_NAME, q, uint, u, 32, 4, 23); + TEST_VSXI_N(INSN_NAME, q, uint, u, 64, 2, 53); + TEST_VSXI_N(INSN_NAME, q, poly, p, 8, 16, 3); + TEST_VSXI_N(INSN_NAME, q, poly, p, 16, 8, 12); + + CHECK_RESULTS (TEST_MSG, ""); + +#ifdef EXTRA_TESTS + EXTRA_TESTS(); +#endif +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsli_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsli_n.c new file mode 100644 index 000..342aec5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsli_n.c @@ -0,0 +1,162 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vsli +#define TEST_MSG "VSLI_N" + +/* Extra tests for functions requiring corner cases tests. */ +void vsli_extra(void); +#define EXTRA_TESTS vsli_extra + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0x20, 0x21, 0x22, 0x23, + 0x24, 0x25, 0x26, 0x27 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe1, 0xffe2, 0xffe3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x6, 0x7 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x64fff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x50, 0x51, 0x52, 0x53, + 0x50, 0x51, 0x52, 0x53 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x7bf0, 0x7bf1, 0x7bf2, 0x7bf3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x3ff0, 0x3ff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x10 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x50, 0x51, 0x52, 0x53, + 0x50, 0x51, 0x52, 0x53 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x7bf0, 0x7bf1, 0x7bf2, 0x7bf3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x, 0x }; +VECT_VAR_DECL(expected,i
[[ARM/AArch64][testsuite] 17/36] Add vpadd, vpmax and vpmin tests.
* gcc.target/aarch64/advsimd-intrinsics/vpXXX.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vpadd.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vpmax.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vpmin.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpXXX.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpXXX.inc new file mode 100644 index 000..7ac2ed4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpXXX.inc @@ -0,0 +1,67 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_VPADD1(INSN, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector, T1, W, N)); \ + vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ +VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VPADD(INSN, T1, T2, W, N) \ + TEST_VPADD1(INSN, T1, T2, W, N) \ + + /* No need for 64 bits variants. */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, float, 32, 2); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 2); + + clean_results (); + + /* Initialize input "vector" from "buffer". */ + VLOAD(vector, buffer, , int, s, 8, 8); + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 8, 8); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , float, f, 32, 2); + + /* Apply a unary operator named INSN_NAME. */ + TEST_VPADD(INSN_NAME, int, s, 8, 8); + TEST_VPADD(INSN_NAME, int, s, 16, 4); + TEST_VPADD(INSN_NAME, int, s, 32, 2); + TEST_VPADD(INSN_NAME, uint, u, 8, 8); + TEST_VPADD(INSN_NAME, uint, u, 16, 4); + TEST_VPADD(INSN_NAME, uint, u, 32, 2); + TEST_VPADD(INSN_NAME, float, f, 32, 2); + + CHECK(TEST_MSG, int, 8, 8, PRIx32, expected, ""); + CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, ""); + CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 8, 8, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, ""); + CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); + CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadd.c new file mode 100644 index 000..5ddfd3d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadd.c @@ -0,0 +1,19 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vpadd +#define TEST_MSG "VPADD" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xe1, 0xe5, 0xe9, 0xed, + 0xe1, 0xe5, 0xe9, 0xed }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe1, 0xffe5, 0xffe1, 0xffe5 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffe1, 0xffe1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe1, 0xe5, 0xe9, 0xed, + 0xe1, 0xe5, 0xe9, 0xed }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe1, 0xffe5, 0xffe1, 0xffe5 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffe1, 0xffe1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1f8, 0xc1f8 }; + +#include "vpXXX.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmax.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmax.c new file mode 100644 index 000..f27a9a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmax.c @@ -0,0 +1,20 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + + +#define INSN_NAME vpmax +#define TEST_MSG "VPMAX" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0xf1, 0xf3, 0xf5, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff3, 0xfff1, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0xf1, 0xf3, 0xf5, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff3, 0xfff1, 0x
[[ARM/AArch64][testsuite] 20/36] Add vsubw tests, putting most of the code in common with vaddw through vXXWw.inc
* gcc.target/aarch64/advsimd-intrinsics/vXXXw.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vsubw.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vaddw.c: Use code from vXXXw.inc. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXw.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXw.inc new file mode 100644 index 000..c535557 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXw.inc @@ -0,0 +1,70 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* Basic test: y=vaddw(x1,x2), then store the result. */ +#define TEST_VADDW1(INSN, T1, T2, W, W2, N)\ + VECT_VAR(vector_res, T1, W2, N) =\ +INSN##_##T2##W(VECT_VAR(vector, T1, W2, N), \ + VECT_VAR(vector2, T1, W, N));\ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + +#define TEST_VADDW(INSN, T1, T2, W, W2, N) \ + TEST_VADDW1(INSN, T1, T2, W, W2, N) + + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector2, int, 8, 8); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 8, 8); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize input "vector" from "buffer". */ + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, uint, u, 64, 2); + + /* Choose init value arbitrarily. */ + VDUP(vector2, , int, s, 8, 8, -13); + VDUP(vector2, , int, s, 16, 4, -14); + VDUP(vector2, , int, s, 32, 2, -16); + VDUP(vector2, , uint, u, 8, 8, 0xf3); + VDUP(vector2, , uint, u, 16, 4, 0xfff1); + VDUP(vector2, , uint, u, 32, 2, 0xfff0); + + /* Execute the tests. */ + TEST_VADDW(INSN_NAME, int, s, 8, 16, 8); + TEST_VADDW(INSN_NAME, int, s, 16, 32, 4); + TEST_VADDW(INSN_NAME, int, s, 32, 64, 2); + TEST_VADDW(INSN_NAME, uint, u, 8, 16, 8); + TEST_VADDW(INSN_NAME, uint, u, 16, 32, 4); + TEST_VADDW(INSN_NAME, uint, u, 32, 64, 2); + + CHECK_RESULTS (TEST_MSG, ""); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddw.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddw.c index 95cbb31..27f54f6 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddw.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddw.c @@ -2,6 +2,9 @@ #include "arm-neon-ref.h" #include "compute-ref-data.h" +#define INSN_NAME vaddw +#define TEST_MSG "VADDW" + /* Expected results. */ VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; @@ -45,76 +48,4 @@ VECT_VAR_DECL(expected,poly,16,8) [] = { 0x, 0x, 0x, 0x, VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x, 0x, 0x, 0x }; -#define INSN_NAME vaddw -#define TEST_MSG "VADDW" - -#define FNNAME1(NAME) exec_ ## NAME -#define FNNAME(NAME) FNNAME1(NAME) - -void FNNAME (INSN_NAME) (void) -{ - /* Basic test: y=vaddw(x1,x2), then store the result. */ -#define TEST_VADDW1(INSN, T1, T2, W, W2, N)\ - VECT_VAR(vector_res, T1, W2, N) =\ -INSN##_##T2##W(VECT_VAR(vector, T1, W2, N), \ - VECT_VAR(vector2, T1, W, N));\ - vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) - -#define TEST_VADDW(INSN, T1, T2, W, W2, N) \ - TEST_VADDW1(INSN, T1, T2, W, W2, N) - - DECL_VARIABLE(vector, int, 16, 8); - DECL_VARIABLE(vector, int, 32, 4); - DECL_VARIABLE(vector, int, 64, 2); - DECL_VARIABLE(vector, uint, 16, 8); - DECL_VARIABLE(vector, uint, 32, 4); - DECL_VARIABLE(vector, uint, 64, 2); - - DECL_VARIABLE(vector2, int, 8, 8); - DECL_VARIABLE(vector2, int, 16, 4); - DECL_VARIABLE(vector2, int, 32, 2); - DECL_VARIABLE(vector2, uint, 8, 8); - DECL_VARIABLE(vector2, uint, 16, 4); - DECL_VARIABLE(vector2, uint, 32, 2); - - DECL_VARIABLE(vector_res, int, 16, 8); - DECL_VARIABLE(vector_res, int, 32, 4); -
[[ARM/AArch64][testsuite] 21/36] Add vmovl tests.
* gcc.target/aarch64/advsimd-intrinsics/vmovl.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmovl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmovl.c new file mode 100644 index 000..427c9ba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmovl.c @@ -0,0 +1,77 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x, 0x }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x, 0x }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x, 0x }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfff0, 0xfff1, + 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfff0, + 0xfff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, +0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x, 0x, 0x, 0x, +0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x, 0x, + 0x, 0x }; + +#define TEST_MSG "VMOVL" +void exec_vmovl (void) +{ + /* Basic test: vec128=vmovl(vec64), then store the result. */ +#define TEST_VMOVL(T1, T2, W, W2, N) \ + VECT_VAR(vector128, T1, W2, N) = \ +vmovl_##T2##W(VECT_VAR(vector64, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector128, T1, W2, N)) + + DECL_VARIABLE_64BITS_VARIANTS(vector64); + DECL_VARIABLE_128BITS_VARIANTS(vector128); + + TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector64, buffer); + + clean_results (); + + TEST_VMOVL(int, s, 8, 16, 8); + TEST_VMOVL(int, s, 16, 32, 4); + TEST_VMOVL(int, s, 32, 64, 2); + TEST_VMOVL(uint, u, 8, 16, 8); + TEST_VMOVL(uint, u, 16, 32, 4); + TEST_VMOVL(uint, u, 32, 64, 2); + + CHECK_RESULTS (TEST_MSG, ""); +} + +int main (void) +{ + exec_vmovl (); + return 0; +} -- 2.1.0
[[ARM/AArch64][testsuite] 22/36] Add vmovn tests.
* gcc.target/aarch64/advsimd-intrinsics/vmovn.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmovn.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmovn.c new file mode 100644 index 000..bc2c2ca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmovn.c @@ -0,0 +1,50 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfff0, 0xfff1 }; + +#define TEST_MSG "VMOVN" +void exec_vmovn (void) +{ + /* Basic test: vec64=vmovn(vec128), then store the result. */ +#define TEST_VMOVN(T1, T2, W, W2, N) \ + VECT_VAR(vector64, T1, W2, N) = \ +vmovn_##T2##W(VECT_VAR(vector128, T1, W, N)); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N)) + + DECL_VARIABLE_64BITS_VARIANTS(vector64); + DECL_VARIABLE_128BITS_VARIANTS(vector128); + + TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer); + + clean_results (); + + TEST_VMOVN(int, s, 16, 8, 8); + TEST_VMOVN(int, s, 32, 16, 4); + TEST_VMOVN(int, s, 64, 32, 2); + TEST_VMOVN(uint, u, 16, 8, 8); + TEST_VMOVN(uint, u, 32, 16, 4); + TEST_VMOVN(uint, u, 64, 32, 2); + + CHECK(TEST_MSG, int, 8, 8, PRIx32, expected, ""); + CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, ""); + CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 8, 8, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, ""); + CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); +} + +int main (void) +{ + exec_vmovn (); + return 0; +} -- 2.1.0
[[ARM/AArch64][testsuite] 23/36] Add vmul_lane tests.
* gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c new file mode 100644 index 000..978cd9b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c @@ -0,0 +1,104 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfde0, 0xfe02 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xace0, 0xb212 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b6, 0xc3ab }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc, + 0xffd0, 0xffd4, 0xffd8, 0xffdc }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfde0, 0xfe02, + 0xfe24, 0xfe46 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c, +0xccd0, 0xd114, 0xd558, 0xd99c }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xace0, 0xb212, +0xb744, 0xbc76 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc3b6, 0xc3ab, + 0xc39f, 0xc394 }; + +#define TEST_MSG "VMUL_LANE" +void exec_vmul_lane (void) +{ +#define DECL_VMUL(VAR) \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2);\ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, float, 32, 4) + + /* vector_res = vmul_lane(vector,vector2,lane), then store the result. */ +#define TEST_VMUL_LANE(Q, T1, T2, W, N, N2, L) \ + VECT_VAR(vector_res, T1, W, N) = \ +vmul##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N2),\ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + DECL_VMUL(vector); + DECL_VMUL(vector_res); + + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + DECL_VARIABLE(vector2, float, 32, 2); + + clean_results (); + + /* Initialize vector from pre-initialized values. */ + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Initialize vector2. */ + VDUP(vector2, , int, s, 16, 4, 0x4); + VDUP(vector2, , int, s, 32, 2, 0x22); + VDUP(vector2, , uint, u, 16, 4, 0x444); + VDUP(vector2, , uint, u, 32, 2, 0x532); + VDUP(vector2, , float, f, 32, 2, 22.8f); + + /* Choose lane arbitrarily. */ + TEST_VMUL_LANE(, int, s, 16, 4, 4, 2); + TEST_VMUL_LANE(, int, s, 32, 2, 2, 1); + TEST_VMUL_LANE(, uint, u, 16, 4, 4, 2); + TEST_VMUL_LANE(, uint, u, 32, 2, 2, 1); + TEST_VMUL_LANE(, float, f, 32, 2, 2, 1); + TEST_VMUL_LANE(q, int, s, 16, 8, 4, 2); + TEST_VMUL_LANE(q, int, s, 32, 4, 2, 0); + TEST_VMUL_LANE(q, uint, u, 16, 8, 4, 2); + TEST_VMUL_LANE(q, uint, u, 32, 4, 2, 1); + TEST_VMUL_LANE(q, float, f, 32, 4, 2, 0); + + CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, ""); + CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, ""); + CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); + CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); + CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, ""); + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, ""); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); +} + +int main (void) +{ + exec_vmul_lane (); + return 0; +} -- 2.1.0
[[ARM/AArch64][testsuite] 24/36] Add vmul_n tests.
* gcc.target/aarch64/advsimd-intrinsics/vmul_n.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c new file mode 100644 index 000..be0ee65 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c @@ -0,0 +1,96 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfef0, 0xff01, 0xff12, 0xff23 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfde0, 0xfe02 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfcd0, 0xfd03, 0xfd36, 0xfd69 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfbc0, 0xfc04 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b2, 0xc3a74000 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf, + 0xfc04, 0xfc59, 0xfcae, 0xfd03 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xf9a0, 0xfa06, + 0xfa6c, 0xfad2 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf890, 0xf907, 0xf97e, 0xf9f5, +0xfa6c, 0xfae3, 0xfb5a, 0xfbd1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xf780, 0xf808, +0xf890, 0xf918 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4b1cccd, 0xc4a6b000, + 0xc49b9333, 0xc4907667 }; + +#define INSN_NAME vmul_n +#define TEST_MSG "VMUL_N" + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ +#define DECL_VMUL(VAR) \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2);\ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, float, 32, 4) + + /* vector_res = vmul_n(vector,val), then store the result. */ +#define TEST_VMUL_N(Q, T1, T2, W, N, L) \ + VECT_VAR(vector_res, T1, W, N) = \ +vmul##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),\ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + DECL_VMUL(vector); + DECL_VMUL(vector_res); + + clean_results (); + + /* Initialize vector from pre-initialized values. */ + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Choose multiplier arbitrarily. */ + TEST_VMUL_N(, int, s, 16, 4, 0x11); + TEST_VMUL_N(, int, s, 32, 2, 0x22); + TEST_VMUL_N(, uint, u, 16, 4, 0x33); + TEST_VMUL_N(, uint, u, 32, 2, 0x44); + TEST_VMUL_N(, float, f, 32, 2, 22.3f); + TEST_VMUL_N(q, int, s, 16, 8, 0x55); + TEST_VMUL_N(q, int, s, 32, 4, 0x66); + TEST_VMUL_N(q, uint, u, 16, 8, 0x77); + TEST_VMUL_N(q, uint, u, 32, 4, 0x88); + TEST_VMUL_N(q, float, f, 32, 4, 88.9f); + + CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, ""); + CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, ""); + CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); + CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); + CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, ""); + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, ""); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + + return 0; +} -- 2.1.0
[[ARM/AArch64][testsuite] 25/36] Add vmull tests.
* gcc.target/aarch64/advsimd-intrinsics/vmull.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull.c new file mode 100644 index 000..3fdd51e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull.c @@ -0,0 +1,75 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,16,8) [] = { 0x100, 0xe1, 0xc4, 0xa9, + 0x90, 0x79, 0x64, 0x51 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x100, 0xe1, 0xc4, 0xa9 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x100, 0xe1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xe100, 0xe2e1, 0xe4c4, 0xe6a9, +0xe890, 0xea79, 0xec64, 0xee51 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe00100, 0xffe200e1, +0xffe400c4, 0xffe600a9 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffe00100, +0xffe200e1 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x5500, 0x5501, 0x5504, 0x5505, +0x5510, 0x5511, 0x5514, 0x5515 }; + +#define TEST_MSG "VMULL" +void exec_vmull (void) +{ + /* Basic test: y=vmull(x,x), then store the result. */ +#define TEST_VMULL(T1, T2, W, W2, N) \ + VECT_VAR(vector_res, T1, W2, N) =\ +vmull_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, poly, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + DECL_VARIABLE(vector_res, poly, 16, 8); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 8, 8); + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 8, 8); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , poly, p, 8, 8); + + TEST_VMULL(int, s, 8, 16, 8); + TEST_VMULL(int, s, 16, 32, 4); + TEST_VMULL(int, s, 32, 64, 2); + TEST_VMULL(uint, u, 8, 16, 8); + TEST_VMULL(uint, u, 16, 32, 4); + TEST_VMULL(uint, u, 32, 64, 2); + TEST_VMULL(poly, p, 8, 16, 8); + + CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, ""); + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, int, 64, 2, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, ""); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 64, 2, PRIx32, expected, ""); + CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, ""); +} + +int main (void) +{ + exec_vmull (); + return 0; +} -- 2.1.0
[[ARM/AArch64][testsuite] 02/36] Be more verbose, and actually confirm that a test was checked.
* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (CHECK): Add trace. (CHECK_FP): Likewise. (CHECK_CUMULATIVE_SAT): Likewise. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h index 6464c66..2730a66 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h @@ -79,6 +79,7 @@ extern size_t strlen(const char *); abort(); \ } \ } \ +fprintf(stderr, "CHECKED %s\n", MSG); \ } /* Floating-point variant. */ @@ -107,6 +108,7 @@ extern size_t strlen(const char *); abort(); \ } \ } \ +fprintf(stderr, "CHECKED %s\n", MSG); \ } /* Clean buffer with a non-zero pattern to help diagnose buffer @@ -323,6 +325,7 @@ extern int VECT_VAR(expected_cumulative_sat, uint, 64, 2); strlen(COMMENT) > 0 ? " " COMMENT : ""); \ abort(); \ } \ +fprintf(stderr, "CHECKED CUMULATIVE SAT %s\n", MSG); \ } #define CHECK_CUMULATIVE_SAT_NAMED(test_name,EXPECTED,comment) \ -- 2.1.0
[[ARM/AArch64][testsuite] 26/36] Add vmull_lane tests.
* gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c new file mode 100644 index 000..d3aa879 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c @@ -0,0 +1,66 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000, 0x2000 }; + +#define TEST_MSG "VMULL_LANE" +void exec_vmull_lane (void) +{ + /* vector_res = vmull_lane(vector,vector2,lane), then store the result. */ +#define TEST_VMULL_LANE(T1, T2, W, W2, N, L) \ + VECT_VAR(vector_res, T1, W2, N) =\ +vmull##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ +VECT_VAR(vector2, T1, W, N), \ +L);\ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize vector. */ + VDUP(vector, , int, s, 16, 4, 0x1000); + VDUP(vector, , int, s, 32, 2, 0x1000); + VDUP(vector, , uint, u, 16, 4, 0x1000); + VDUP(vector, , uint, u, 32, 2, 0x1000); + + /* Initialize vector2. */ + VDUP(vector2, , int, s, 16, 4, 0x4); + VDUP(vector2, , int, s, 32, 2, 0x2); + VDUP(vector2, , uint, u, 16, 4, 0x4); + VDUP(vector2, , uint, u, 32, 2, 0x2); + + /* Choose lane arbitrarily. */ + TEST_VMULL_LANE(int, s, 16, 32, 4, 2); + TEST_VMULL_LANE(int, s, 32, 64, 2, 1); + TEST_VMULL_LANE(uint, u, 16, 32, 4, 2); + TEST_VMULL_LANE(uint, u, 32, 64, 2, 1); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, int, 64, 2, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 64, 2, PRIx32, expected, ""); +} + +int main (void) +{ + exec_vmull_lane (); + return 0; +} -- 2.1.0
[[ARM/AArch64][testsuite] 30/36] Add vpaddl tests.
* gcc.target/aarch64/advsimd-intrinsics/vpaddl.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpaddl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpaddl.c new file mode 100644 index 000..779cc77 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpaddl.c @@ -0,0 +1,129 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe1, 0xffe5, 0xffe9, 0xffed }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffe1, 0xffe5 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffe1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1e1, 0x1e5, 0x1e9, 0x1ed }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1ffe1, 0x1ffe5 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffe1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x, 0x }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe1, 0xffe5, 0xffe9, 0xffed, + 0xfff1, 0xfff5, 0xfff9, 0xfffd }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffe1, 0xffe5, + 0xffe9, 0xffed }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffe1, + 0xffe5 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1e1, 0x1e5, 0x1e9, 0x1ed, +0x1f1, 0x1f5, 0x1f9, 0x1fd }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffe1, 0x1ffe5, 0x1ffe9, 0x1ffed }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1ffe1, 0x1ffe5 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x, 0x, 0x, 0x, +0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x, 0x, + 0x, 0x }; + +#define INSN_NAME vpaddl +#define TEST_MSG "VPADDL/VPADDLQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_VPADDL1(INSN, Q, T1, T2, W, N, W2, N2)\ + VECT_VAR(vector_res, T1, W2, N2) = \ +INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W2(VECT_VAR(result, T1, W2, N2), \ + VECT_VAR(vector_res, T1, W2, N2)) + +#define TEST_VPADDL(INSN, Q, T1, T2, W, N, W2, N2) \ + TEST_VPADDL1(INSN, Q, T1, T2, W, N, W2, N2) + + /* No need for 64 bits variants. */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, uint, 8, 16); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 1); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 1); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize input "vector" from "buffer". */ + VLOAD(vector, buffer, , int, s, 8, 8); + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector,
[[ARM/AArch64][testsuite] 28/36] Add vmnv tests.
* gcc.target/aarch64/advsimd-intrinsics/vmvn.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmvn.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmvn.c new file mode 100644 index 000..04bb5f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmvn.c @@ -0,0 +1,136 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf, 0xe, 0xd, 0xc, + 0xb, 0xa, 0x9, 0x8 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xf, 0xe, 0xd, 0xc }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xf, 0xe }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf, 0xe, 0xd, 0xc, + 0xb, 0xa, 0x9, 0x8 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xf, 0xe, 0xd, 0xc }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xf, 0xe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf, 0xe, 0xd, 0xc, + 0xb, 0xa, 0x9, 0x8 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x, 0x }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf, 0xe, 0xd, 0xc, + 0xb, 0xa, 0x9, 0x8, + 0x7, 0x6, 0x5, 0x4, + 0x3, 0x2, 0x1, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xf, 0xe, 0xd, 0xc, + 0xb, 0xa, 0x9, 0x8 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xf, 0xe, 0xd, 0xc }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x, + 0x }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf, 0xe, 0xd, 0xc, +0xb, 0xa, 0x9, 0x8, +0x7, 0x6, 0x5, 0x4, +0x3, 0x2, 0x1, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf, 0xe, 0xd, 0xc, +0xb, 0xa, 0x9, 0x8 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xf, 0xe, 0xd, 0xc }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x, +0x }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf, 0xe, 0xd, 0xc, +0xb, 0xa, 0x9, 0x8, +0x7, 0x6, 0x5, 0x4, +0x3, 0x2, 0x1, 0x0 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x, 0x, 0x, 0x, +0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x, 0x, + 0x, 0x }; + +#define INSN_NAME vmvn +#define TEST_MSG "VMVN/VMVNQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* No need for 64 bits variants. */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, poly, 8, 8); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, uint, 8, 16); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, poly, 8, 16); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, poly, 8, 8); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, uint, 8, 16); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, poly, 8, 16); + + clean_results (); + + /* Initialize input "vector" from "buffer". */ + VLOAD(vector, buffer, , int, s, 8, 8); + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buf
[[ARM/AArch64][testsuite] 32/36] Add vqdmulh_lane tests.
* gcc.target/aarch64/advsimd-intrinsics/vqdmulh_lane.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmulh_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmulh_lane.c new file mode 100644 index 000..5260676 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmulh_lane.c @@ -0,0 +1,121 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +/* Expected results. */ +VECT_VAR_DECL(expected,int,16,4) [] = { 0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x, 0x }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x, 0x, 0x, 0x, + 0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x, 0x, + 0x, 0x }; + +/* Expected values of cumulative_saturation flag when saturation + occurs. */ +int VECT_VAR(expected_cumulative_sat2,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat2,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat2,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat2,int,32,4) = 1; + +/* Expected results when saturation occurs. */ +VECT_VAR_DECL(expected2,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected2,int,32,2) [] = { 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected2,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, + 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fff, 0x7fff, + 0x7fff, 0x7fff }; + +#define INSN_NAME vqdmulh +#define TEST_MSG "VQDMULH_LANE" +#define FNNAME1(NAME) exec_ ## NAME ## _lane +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* vector_res = vqdmulh_lane(vector,vector2,lane), then store the result. */ +#define TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N2),\ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N));\ + CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + + /* Two auxliary macros are necessary to expand INSN. */ +#define TEST_VQDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) + +#define TEST_VQDMULH_LANE(Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMULH_LANE1(INSN_NAME, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) + + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + /* vector2: vqdmulh_lane and vqdmulhq_lane have a 2nd argument with + the same number of elements, so we need only one variable of each + type. */ + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Initialize vector2. */ + VDUP(vector2, , int, s, 16, 4, 0x55); + VDUP(vector2, , int, s, 32, 2, 0xBB); + + /* Choose lane arbitrarily. */ + TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat, ""); + TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat, ""); + TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat, ""); + TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat, ""); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, ""); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, ""); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, ""); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, ""); + + /* Choose input values to trigger saturation. */ + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x8000); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x8000); + VDUP(vector2, , int, s, 16, 4,
[[ARM/AArch64][testsuite] 33/36] Add vqdmulh_n tests.
* gcc.target/aarch64/advsimd-intrinsics/vqdmulh_n.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmulh_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmulh_n.c new file mode 100644 index 000..ab66e2d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmulh_n.c @@ -0,0 +1,110 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +/* Expected results. */ +VECT_VAR_DECL(expected,int,16,4) [] = { 0x19, 0x19, 0x19, 0x19 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x4, 0x4 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xa, 0xa, 0xa, 0xa }; + +/* Expected values of cumulative_saturation flag when saturation + occurs. */ +int VECT_VAR(expected_cumulative_sat2,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat2,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat2,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat2,int,32,4) = 1; + +/* Expected results when saturation occurs. */ +VECT_VAR_DECL(expected2,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected2,int,32,2) [] = { 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected2,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, +0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fff, 0x7fff, +0x7fff, 0x7fff }; + +#define INSN_NAME vqdmulh +#define TEST_MSG "VQDMULH_N" +#define FNNAME1(NAME) exec_ ## NAME ## _n +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + int i; + + /* vector_res = vqdmulh_n(vector,val), then store the result. */ +#define TEST_VQDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),\ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),\ + VECT_VAR(vector_res, T1, W, N));\ + CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + + /* Two auxliary macros are necessary to expand INSN. */ +#define TEST_VQDMULH_N1(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) + +#define TEST_VQDMULH_N(Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMULH_N1(INSN_NAME, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) + + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + clean_results (); + + /* Initialize vector. */ + VDUP(vector, , int, s, 16, 4, 0x1000); + VDUP(vector, , int, s, 32, 2, 0x100023); + VDUP(vector, q, int, s, 16, 8, 0x1000); + VDUP(vector, q, int, s, 32, 4, 0x100045); + + /* Choose multiplier arbitrarily. */ + TEST_VQDMULH_N(, int, s, 16, 4, 0xCF, expected_cumulative_sat, ""); + TEST_VQDMULH_N(, int, s, 32, 2, 0x2344, expected_cumulative_sat, ""); + TEST_VQDMULH_N(q, int, s, 16, 8, 0x80, expected_cumulative_sat, ""); + TEST_VQDMULH_N(q, int, s, 32, 4, 0x5422, expected_cumulative_sat, ""); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, ""); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, ""); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, ""); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, ""); + + /* Choose input values to trigger saturation. */ + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x8000); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x8000); + +#define TEST_MSG2 " (check mul cumulative saturation)" + TEST_VQDMULH_N(, int, s, 16, 4, 0x8000, expected_cumulative_sat2, TEST_MSG2); + TEST_VQDMULH_N(, int, s, 32, 2, 0x8000, expected_cumulative_sat2, TEST_MSG2); + TEST_VQDMULH_N(q, int, s, 16, 8, 0x8000, expected_cumulative_sat2, TEST_MSG2); + TEST_VQDMULH_N(q, int, s, 32, 4, 0x8000, expected_cumulative_sat2, TEST_MSG2); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected2, TEST_MSG2); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected2, TEST_MSG2); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected2, TEST_MSG2); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected2,
[[ARM/AArch64][testsuite] 29/36] Add vpadal tests.
* gcc.target/aarch64/advsimd-intrinsics/vpadal.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadal.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadal.c new file mode 100644 index 000..dcedb45 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadal.c @@ -0,0 +1,155 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffd1, 0xffd6, 0xffdb, 0xffe0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffd1, 0xffd6 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffd1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1d1, 0x1d6, 0x1db, 0x1e0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1ffd1, 0x1ffd6 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffd1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x, 0x }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffd1, 0xffd6, 0xffdb, 0xffe0, + 0xffe5, 0xffea, 0xffef, 0xfff4 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffd1, 0xffd6, + 0xffdb, 0xffe0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffd1, 0xffd6 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1d1, 0x1d6, 0x1db, 0x1e0, +0x1e5, 0x1ea, 0x1ef, 0x1f4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffd1, 0x1ffd6, 0x1ffdb, 0x1ffe0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1ffd1, 0x1ffd6 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33, +0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x, 0x, 0x, 0x, +0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x, 0x, + 0x, 0x }; + +#define INSN_NAME vpadal +#define TEST_MSG "VPADAL/VPADALQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_VPADAL1(INSN, Q, T1, T2, W, N, W2, N2)\ + VECT_VAR(vector_res, T1, W2, N2) = \ +INSN##Q##_##T2##W(VECT_VAR(vector, T1, W2, N2), VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W2(VECT_VAR(result, T1, W2, N2), \ +VECT_VAR(vector_res, T1, W2, N2)) + +#define TEST_VPADAL(INSN, Q, T1, T2, W, N, W2, N2) \ + TEST_VPADAL1(INSN, Q, T1, T2, W, N, W2, N2) + + /* No need for 64 bits variants. */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 64, 1); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, uint, 64, 1); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector2, int, 8, 8); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 8, 8); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + DECL_VARIABLE(vector2, int, 8, 16); + DECL_VARIABLE(vector2, int, 16, 8); + DECL_VARIABLE(vector2, int, 32, 4); + DECL_VARIABLE(vector2, uint, 8, 16); + DECL_VARIABLE(vector2, uint, 16, 8); + DECL_VARIABLE(vector2, uint, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 1); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + D
[[ARM/AArch64][testsuite] 31/36] Add vqdmulh tests.
* gcc.target/aarch64/advsimd-intrinsics/vqdmulh.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmulh.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmulh.c new file mode 100644 index 000..8d2a365 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmulh.c @@ -0,0 +1,122 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +/* Expected results. */ +VECT_VAR_DECL(expected,int,16,4) [] = { 0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x, 0x }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x, 0x, 0x, 0x, + 0x, 0x, 0x, 0x }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x, 0x, + 0x, 0x }; + +/* Expected values of cumulative_saturation flag when saturation + occurs. */ +int VECT_VAR(expected_cumulative_sat2,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat2,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat2,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat2,int,32,4) = 1; + +/* Expected results when saturation occurs. */ +VECT_VAR_DECL(expected2,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected2,int,32,2) [] = { 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected2,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, + 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fff, 0x7fff, + 0x7fff, 0x7fff }; + +#define INSN_NAME vqdmulh +#define TEST_MSG "VQDMULH" + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* vector_res = vqdmulh(vector,vector2,lane), then store the result. */ +#define TEST_VQDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N));\ + CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + + /* Two auxliary macros are necessary to expand INSN. */ +#define TEST_VQDMULH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + +#define TEST_VQDMULH(Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)\ + TEST_VQDMULH1(INSN_NAME, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, int, 16, 8); + DECL_VARIABLE(vector2, int, 32, 4); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Initialize vector2. */ + VDUP(vector2, , int, s, 16, 4, 0x55); + VDUP(vector2, , int, s, 32, 2, 0xBB); + VDUP(vector2, q, int, s, 16, 8, 0x33); + VDUP(vector2, q, int, s, 32, 4, 0x22); + + TEST_VQDMULH(, int, s, 16, 4, expected_cumulative_sat, ""); + TEST_VQDMULH(, int, s, 32, 2, expected_cumulative_sat, ""); + TEST_VQDMULH(q, int, s, 16, 8, expected_cumulative_sat, ""); + TEST_VQDMULH(q, int, s, 32, 4, expected_cumulative_sat, ""); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, ""); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, ""); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, ""); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, ""); + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector2, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x8000); + VDUP(vector2, , int, s, 32, 2, 0x8000); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector2, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x8000); + VDUP(vector2, q, int, s, 32, 4, 0x8000); + +#define TEST_MSG2 "with saturation" + TEST_VQDMULH(, int, s, 16, 4, expected_cumulative_sat2, TEST_MSG2); + TEST_VQDMULH(, int, s, 32, 2, expected_cu
[[ARM/AArch64][testsuite] 36/36] Add vqdmull_n tests.
* gcc.target/aarch64/advsimd-intrinsics/vqdmull_n.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_n.c new file mode 100644 index 000..9e73009 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_n.c @@ -0,0 +1,92 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0x44000, 0x44000, + 0x44000, 0x44000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xaa000, 0xaa000 }; + +/* Expected values of cumulative_saturation flag when saturation + occurs. */ +int VECT_VAR(expected_cumulative_sat2,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat2,int,32,2) = 1; + +/* Expected results when saturation occurs. */ +VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fff, 0x7fff, +0x7fff, 0x7fff }; +VECT_VAR_DECL(expected2,int,64,2) [] = { 0x7fff, +0x7fff }; + +#define INSN_NAME vqdmull +#define TEST_MSG "VQDMULL_N" + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + int i; + + /* vector_res = vqdmull_n(vector,val), then store the result. */ +#define TEST_VQDMULL_N2(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ + VECT_VAR(vector_res, T1, W2, N) =\ +INSN##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ +L);\ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ +VECT_VAR(vector_res, T1, W2, N)); \ + CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + + /* Two auxliary macros are necessary to expand INSN. */ +#define TEST_VQDMULL_N1(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMULL_N2(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) + +#define TEST_VQDMULL_N(T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMULL_N1(INSN_NAME, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) + + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + /* Initialize vector. */ + VDUP(vector, , int, s, 16, 4, 0x1000); + VDUP(vector, , int, s, 32, 2, 0x1000); + + /* Initialize vector2. */ + VDUP(vector2, , int, s, 16, 4, 0x4); + VDUP(vector2, , int, s, 32, 2, 0x2); + + /* Choose multiplier arbitrarily. */ + TEST_VQDMULL_N(int, s, 16, 32, 4, 0x22, expected_cumulative_sat, ""); + TEST_VQDMULL_N(int, s, 32, 64, 2, 0x55, expected_cumulative_sat, ""); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x8000); + +#define TEST_MSG2 "with saturation" + TEST_VQDMULL_N(int, s, 16, 32, 4, 0x8000, expected_cumulative_sat2, TEST_MSG2); + TEST_VQDMULL_N(int, s, 32, 64, 2, 0x8000, expected_cumulative_sat2, TEST_MSG2); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected2, TEST_MSG2); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} -- 2.1.0
[[ARM/AArch64][testsuite] 34/36] Add vqdmull tests.
* gcc.target/aarch64/advsimd-intrinsics/vqdmull.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull.c new file mode 100644 index 000..e71a624 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull.c @@ -0,0 +1,86 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0x200, 0x1c2, 0x188, 0x152 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x200, 0x1c2 }; + +/* Expected values of cumulative_saturation flag when saturation + occurs. */ +int VECT_VAR(expected_cumulative_sat2,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat2,int,32,2) = 1; + +/* Expected results when saturation occurs. */ +VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fff, 0x7fff, +0x7fff, 0x7fff }; +VECT_VAR_DECL(expected2,int,64,2) [] = { 0x7fff, +0x7fff }; + +#define INSN_NAME vqdmull +#define TEST_MSG "VQDMULL" + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* Basic test: y=vqdmull(x,x), then store the result. */ +#define TEST_VQDMULL2(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ + VECT_VAR(vector_res, T1, W2, N) =\ +INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N));\ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ +VECT_VAR(vector_res, T1, W2, N)); \ + CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + + /* Two auxliary macros are necessary to expand INSN. */ +#define TEST_VQDMULL1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMULL2(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) + +#define TEST_VQDMULL(T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMULL1(INSN_NAME, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) + + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector2, buffer, , int, s, 16, 4); + VLOAD(vector2, buffer, , int, s, 32, 2); + + TEST_VQDMULL(int, s, 16, 32, 4, expected_cumulative_sat, ""); + TEST_VQDMULL(int, s, 32, 64, 2, expected_cumulative_sat, ""); + + CHECK (TEST_MSG, int, 32, 4, PRIx16, expected, ""); + CHECK (TEST_MSG, int, 64, 2, PRIx32, expected, ""); + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector2, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x8000); + VDUP(vector2, , int, s, 32, 2, 0x8000); + +#define TEST_MSG2 "with saturation" + TEST_VQDMULL(int, s, 16, 32, 4, expected_cumulative_sat2, TEST_MSG2); + TEST_VQDMULL(int, s, 32, 64, 2, expected_cumulative_sat2, TEST_MSG2); + + CHECK (TEST_MSG, int, 32, 4, PRIx16, expected2, TEST_MSG2); + CHECK (TEST_MSG, int, 64, 2, PRIx32, expected2, TEST_MSG2); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} -- 2.1.0
[[ARM/AArch64][testsuite] 35/36] Add vqdmull_lane tests.
* gcc.target/aarch64/advsimd-intrinsics/vqdmull_lane.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_lane.c new file mode 100644 index 000..12f2a6b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_lane.c @@ -0,0 +1,94 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x4000, 0x4000 }; + +/* Expected values of cumulative_saturation flag when saturation + occurs. */ +int VECT_VAR(expected_cumulative_sat2,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat2,int,32,2) = 1; + +/* Expected results when saturation occurs. */ +VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fff, 0x7fff, +0x7fff, 0x7fff }; +VECT_VAR_DECL(expected2,int,64,2) [] = { 0x7fff, +0x7fff }; + +#define INSN_NAME vqdmull +#define TEST_MSG "VQDMULL_LANE" + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + int i; + + /* vector_res = vqdmull_lane(vector,vector2,lane), then store the result. */ +#define TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ + VECT_VAR(vector_res, T1, W2, N) =\ +INSN##_lane_##T2##W(VECT_VAR(vector, T1, W, N),\ + VECT_VAR(vector2, T1, W, N),\ + L); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ +VECT_VAR(vector_res, T1, W2, N)); \ + CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + + /* Two auxliary macros are necessary to expand INSN. */ +#define TEST_VQDMULL_LANE1(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) + +#define TEST_VQDMULL_LANE(T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMULL_LANE1(INSN_NAME, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) + + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + /* Initialize vector. */ + VDUP(vector, , int, s, 16, 4, 0x1000); + VDUP(vector, , int, s, 32, 2, 0x1000); + + /* Initialize vector2. */ + VDUP(vector2, , int, s, 16, 4, 0x4); + VDUP(vector2, , int, s, 32, 2, 0x2); + + /* Choose lane arbitrarily. */ + TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2, expected_cumulative_sat, ""); + TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1, expected_cumulative_sat, ""); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector2, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x8000); + VDUP(vector2, , int, s, 32, 2, 0x8000); + +#define TEST_MSG2 "with saturation" + TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2, expected_cumulative_sat2, TEST_MSG2); + TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1, expected_cumulative_sat2, TEST_MSG2); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected2, TEST_MSG2); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} -- 2.1.0
Re: [PATCH] PR59448 - Promote consume to acquire
On Tue, 2015-01-13 at 09:56 -0500, Andrew MacLeod wrote: > The problem with the patch in the PR is the memory model is immediately > promoted from consume to acquire. This happens *before* any of the > memmodel checks are made. If a consume is illegally specified (such as > in a compare_exchange), it gets promoted to acquire and the compiler > doesn't report the error because it never sees the consume. The only issue I can think of in compare_exchange is if the program specifies memory_order_consume for the success path but memory_order_acquire for the failure path, which is disallowed by the standard. However, I don't see a reason why the standard's requirement is anything but a performance check in our particular case. The only case we prevent the compiler from reporting is a consume-on-success / acquire-on-failure combination. But we upgrade the former to acquire, so we can't even cause libatomic (or similar) to issue too weak barriers due to libatomic relying on the standard's requirement. Thus, if there's no easy way to upgrade to acquire after the sanity checks, I think this isn't critical enough to hold up the patch from being committed. memory_order_consume is clearly a feature for experts.
[[ARM/AArch64][testsuite] 08/36] Add vtrn tests. Refactor vzup and vzip tests.
* gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vtrn.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vuzp.c: Use code from vshuffle.inc. * gcc.target/aarch64/advsimd-intrinsics/vzip.c: Use code from vshuffle.inc. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc new file mode 100644 index 000..928f338 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc @@ -0,0 +1,139 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* In this case, output variables are arrays of vectors. */ +#define DECL_VSHUFFLE(T1, W, N) \ + VECT_ARRAY_TYPE(T1, W, N, 2) VECT_ARRAY_VAR(result_vec, T1, W, N, 2); \ + VECT_VAR_DECL(result_bis, T1, W, N)[2 * N] + + /* We need to use a temporary result buffer (result_bis), because + the one used for other tests is not large enough. A subset of the + result data is moved from result_bis to result, and it is this + subset which is used to check the actual behaviour. The next + macro enables to move another chunk of data from result_bis to + result. */ +#define TEST_VSHUFFLE(INSN, Q, T1, T2, W, N) \ + VECT_ARRAY_VAR(result_vec, T1, W, N, 2) =\ +INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst2##Q##_##T2##W(VECT_VAR(result_bis, T1, W, N),\ + VECT_ARRAY_VAR(result_vec, T1, W, N, 2)); \ + memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis, T1, W, N), \ +sizeof(VECT_VAR(result, T1, W, N))); + + /* Overwrite "result" with the contents of "result_bis"[X]. */ +#define TEST_EXTRA_CHUNK(T1, W, N, X) \ + memcpy(VECT_VAR(result, T1, W, N), &(VECT_VAR(result_bis, T1, W, N)[X*N]), \ +sizeof(VECT_VAR(result, T1, W, N))); + + DECL_VARIABLE_ALL_VARIANTS(vector1); + DECL_VARIABLE_ALL_VARIANTS(vector2); + + /* We don't need 64 bits variants. */ +#define DECL_ALL_VSHUFFLE()\ + DECL_VSHUFFLE(int, 8, 8);\ + DECL_VSHUFFLE(int, 16, 4); \ + DECL_VSHUFFLE(int, 32, 2); \ + DECL_VSHUFFLE(uint, 8, 8); \ + DECL_VSHUFFLE(uint, 16, 4); \ + DECL_VSHUFFLE(uint, 32, 2); \ + DECL_VSHUFFLE(poly, 8, 8); \ + DECL_VSHUFFLE(poly, 16, 4); \ + DECL_VSHUFFLE(float, 32, 2); \ + DECL_VSHUFFLE(int, 8, 16); \ + DECL_VSHUFFLE(int, 16, 8); \ + DECL_VSHUFFLE(int, 32, 4); \ + DECL_VSHUFFLE(uint, 8, 16); \ + DECL_VSHUFFLE(uint, 16, 8); \ + DECL_VSHUFFLE(uint, 32, 4); \ + DECL_VSHUFFLE(poly, 8, 16); \ + DECL_VSHUFFLE(poly, 16, 8); \ + DECL_VSHUFFLE(float, 32, 4) + + DECL_ALL_VSHUFFLE(); + + /* Initialize input "vector" from "buffer". */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); + VLOAD(vector1, buffer, , float, f, 32, 2); + VLOAD(vector1, buffer, q, float, f, 32, 4); + + /* Choose arbitrary initialization values. */ + VDUP(vector2, , int, s, 8, 8, 0x11); + VDUP(vector2, , int, s, 16, 4, 0x22); + VDUP(vector2, , int, s, 32, 2, 0x33); + VDUP(vector2, , uint, u, 8, 8, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x66); + VDUP(vector2, , uint, u, 32, 2, 0x77); + VDUP(vector2, , poly, p, 8, 8, 0x55); + VDUP(vector2, , poly, p, 16, 4, 0x66); + VDUP(vector2, , float, f, 32, 2, 33.6f); + + VDUP(vector2, q, int, s, 8, 16, 0x11); + VDUP(vector2, q, int, s, 16, 8, 0x22); + VDUP(vector2, q, int, s, 32, 4, 0x33); + VDUP(vector2, q, uint, u, 8, 16, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x66); + VDUP(vector2, q, uint, u, 32, 4, 0x77); + VDUP(vector2, q, poly, p, 8, 16, 0x55); + VDUP(vector2, q, poly, p, 16, 8, 0x66); + VDUP(vector2, q, float, f, 32, 4, 33.8f); + +#define TEST_ALL_VSHUFFLE(INSN)\ + TEST_VSHUFFLE(INSN, , int, s, 8, 8); \ + TEST_VSHUFFLE(INSN, , int, s, 16, 4);\ + TEST_VSHUFFLE(INSN, , int, s, 32, 2);\ + TEST_VSHUFFLE(INSN, , uint, u, 8, 8);\ + TEST_VSHUFFLE(INSN, , uint, u, 16, 4); \ + TEST_VSHUFFLE(INSN, , uint, u, 32, 2); \ + TEST_VSHUFFLE(INSN, , poly, p, 8, 8);\ + TEST_VSHUFFLE(INSN, , poly, p, 16, 4);
[[ARM/AArch64][testsuite] 06/36] Add vmla and vmls tests.
* gcc.target/aarch64/advsimd-intrinsics/vmlX.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vmla.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vmls.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX.inc new file mode 100644 index 000..1c8f1be --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX.inc @@ -0,0 +1,110 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ +#define DECL_VMLX(T, W, N) \ + DECL_VARIABLE(vector1, T, W, N); \ + DECL_VARIABLE(vector2, T, W, N); \ + DECL_VARIABLE(vector3, T, W, N); \ + DECL_VARIABLE(vector_res, T, W, N) + + /* vector_res = vmla(vector, vector3, vector4), + then store the result. */ +#define TEST_VMLX1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + VECT_VAR(vector3, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLX(INSN, Q, T1, T2, W, N) \ + TEST_VMLX1(INSN, Q, T1, T2, W, N) + + DECL_VMLX(int, 8, 8); + DECL_VMLX(int, 16, 4); + DECL_VMLX(int, 32, 2); + DECL_VMLX(uint, 8, 8); + DECL_VMLX(uint, 16, 4); + DECL_VMLX(uint, 32, 2); + DECL_VMLX(float, 32, 2); + DECL_VMLX(int, 8, 16); + DECL_VMLX(int, 16, 8); + DECL_VMLX(int, 32, 4); + DECL_VMLX(uint, 8, 16); + DECL_VMLX(uint, 16, 8); + DECL_VMLX(uint, 32, 4); + DECL_VMLX(float, 32, 4); + + clean_results (); + + VLOAD(vector1, buffer, , int, s, 8, 8); + VLOAD(vector1, buffer, , int, s, 16, 4); + VLOAD(vector1, buffer, , int, s, 32, 2); + VLOAD(vector1, buffer, , uint, u, 8, 8); + VLOAD(vector1, buffer, , uint, u, 16, 4); + VLOAD(vector1, buffer, , uint, u, 32, 2); + VLOAD(vector1, buffer, , float, f, 32, 2); + VLOAD(vector1, buffer, q, int, s, 8, 16); + VLOAD(vector1, buffer, q, int, s, 16, 8); + VLOAD(vector1, buffer, q, int, s, 32, 4); + VLOAD(vector1, buffer, q, uint, u, 8, 16); + VLOAD(vector1, buffer, q, uint, u, 16, 8); + VLOAD(vector1, buffer, q, uint, u, 32, 4); + VLOAD(vector1, buffer, q, float, f, 32, 4); + + VDUP(vector2, , int, s, 8, 8, 0x11); + VDUP(vector2, , int, s, 16, 4, 0x22); + VDUP(vector2, , int, s, 32, 2, 0x33); + VDUP(vector2, , uint, u, 8, 8, 0x44); + VDUP(vector2, , uint, u, 16, 4, 0x55); + VDUP(vector2, , uint, u, 32, 2, 0x66); + VDUP(vector2, , float, f, 32, 2, 33.1f); + VDUP(vector2, q, int, s, 8, 16, 0x77); + VDUP(vector2, q, int, s, 16, 8, 0x88); + VDUP(vector2, q, int, s, 32, 4, 0x99); + VDUP(vector2, q, uint, u, 8, 16, 0xAA); + VDUP(vector2, q, uint, u, 16, 8, 0xBB); + VDUP(vector2, q, uint, u, 32, 4, 0xCC); + VDUP(vector2, q, float, f, 32, 4, 99.2f); + + VDUP(vector3, , int, s, 8, 8, 0xFF); + VDUP(vector3, , int, s, 16, 4, 0xEE); + VDUP(vector3, , int, s, 32, 2, 0xDD); + VDUP(vector3, , uint, u, 8, 8, 0xCC); + VDUP(vector3, , uint, u, 16, 4, 0xBB); + VDUP(vector3, , uint, u, 32, 2, 0xAA); + VDUP(vector3, , float, f, 32, 2, 10.23f); + VDUP(vector3, q, int, s, 8, 16, 0x99); + VDUP(vector3, q, int, s, 16, 8, 0x88); + VDUP(vector3, q, int, s, 32, 4, 0x77); + VDUP(vector3, q, uint, u, 8, 16, 0x66); + VDUP(vector3, q, uint, u, 16, 8, 0x55); + VDUP(vector3, q, uint, u, 32, 4, 0x44); + VDUP(vector3, q, float, f, 32, 4, 77.8f); + + TEST_VMLX(INSN_NAME, , int, s, 8, 8); + TEST_VMLX(INSN_NAME, , int, s, 16, 4); + TEST_VMLX(INSN_NAME, , int, s, 32, 2); + TEST_VMLX(INSN_NAME, , uint, u, 8, 8); + TEST_VMLX(INSN_NAME, , uint, u, 16, 4); + TEST_VMLX(INSN_NAME, , uint, u, 32, 2); + TEST_VMLX(INSN_NAME, , float, f, 32, 2); + TEST_VMLX(INSN_NAME, q, int, s, 8, 16); + TEST_VMLX(INSN_NAME, q, int, s, 16, 8); + TEST_VMLX(INSN_NAME, q, int, s, 32, 4); + TEST_VMLX(INSN_NAME, q, uint, u, 8, 16); + TEST_VMLX(INSN_NAME, q, uint, u, 16, 8); + TEST_VMLX(INSN_NAME, q, uint, u, 32, 4); + TEST_VMLX(INSN_NAME, q, float, f, 32, 4); + + CHECK_RESULTS (TEST_MSG, ""); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla.c new file mode 100644 index 000..e3da60c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla.c @@ -0,0 +1,50 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vmla +#define TEST_MSG "VMLA" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xdf, 0xe0, 0xe1, 0xe2, + 0xe3, 0xe4, 0xe5, 0xe6 }; +VECT_VAR_DECL(exp
Re: [patch RFA libffi SH] Fix configure error for sh4-unknown-linux-gnu
On 01/12/2015 09:38 PM, Kaz Kojima wrote: > 2015-01-13 Kaz Kojima > > * configure.host: Remove extra brackets for sh. Ok, thanks. r~
Re: [PATCH] PR59448 - Promote consume to acquire
On 01/13/2015 10:20 AM, Torvald Riegel wrote: On Tue, 2015-01-13 at 09:56 -0500, Andrew MacLeod wrote: The problem with the patch in the PR is the memory model is immediately promoted from consume to acquire. This happens *before* any of the memmodel checks are made. If a consume is illegally specified (such as in a compare_exchange), it gets promoted to acquire and the compiler doesn't report the error because it never sees the consume. The only issue I can think of in compare_exchange is if the program specifies memory_order_consume for the success path but memory_order_acquire for the failure path, which is disallowed by the standard. However, I don't see a reason why the standard's requirement is anything but a performance check in our particular case. The only case we prevent the compiler from reporting is a consume-on-success / acquire-on-failure combination. But we upgrade the former to acquire, so we can't even cause libatomic (or similar) to issue too weak barriers due to libatomic relying on the standard's requirement. Thus, if there's no easy way to upgrade to acquire after the sanity checks, I think this isn't critical enough to hold up the patch from being committed. memory_order_consume is clearly a feature for experts. The error was actually in exchange... not compare_exchange like I wrote. and causes a testsuite error that specifically tests for an illegal consume. Andrew
Re: libffi is broken for x32
On Mon, Jan 12, 2015 at 5:13 PM, Richard Henderson wrote: > On 01/12/2015 04:57 PM, H.J. Lu wrote: >> The problem is my x86_64-*-linux-gnux32 patch >> >> https://gcc.gnu.org/ml/gcc-patches/2012-08/msg01083.html >> >> was never accepted upstream. Can I apply it to config.guess >> in GCC? > > Ah. Hmm. Perhaps the configure.host patch would be better after all. > Can I apply it to GCC trunk? Thanks. -- H.J.
[[ARM/AArch64][testsuite] 27/36] Add vmull_n tests.
* gcc.target/aarch64/advsimd-intrinsics/vmull_n.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_n.c new file mode 100644 index 000..df28a94 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_n.c @@ -0,0 +1,61 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0x11000, 0x11000, 0x11000, 0x11000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x22000, 0x22000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33000, 0x33000, 0x33000, 0x33000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x44000, 0x44000 }; + +#define INSN_NAME vmull +#define TEST_MSG "VMULL_N" +void exec_vmull_n (void) +{ + int i; + + /* vector_res = vmull_n(vector,val), then store the result. */ +#define TEST_VMULL_N1(INSN, T1, T2, W, W2, N, L) \ + VECT_VAR(vector_res, T1, W2, N) =\ +INSN##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ +L);\ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + +#define TEST_VMULL_N(INSN, T1, T2, W, W2, N, L)\ + TEST_VMULL_N1(INSN, T1, T2, W, W2, N, L) + + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize vector. */ + VDUP(vector, , int, s, 16, 4, 0x1000); + VDUP(vector, , int, s, 32, 2, 0x1000); + VDUP(vector, , uint, u, 16, 4, 0x1000); + VDUP(vector, , uint, u, 32, 2, 0x1000); + + /* Choose multiplier arbitrarily. */ + TEST_VMULL_N(INSN_NAME, int, s, 16, 32, 4, 0x11); + TEST_VMULL_N(INSN_NAME, int, s, 32, 64, 2, 0x22); + TEST_VMULL_N(INSN_NAME, uint, u, 16, 32, 4, 0x33); + TEST_VMULL_N(INSN_NAME, uint, u, 32, 64, 2, 0x44); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); + CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); +} + +int main (void) +{ + exec_vmull_n (); + return 0; +} -- 2.1.0
Re: libffi is broken for x32
On 01/13/2015 07:35 AM, H.J. Lu wrote: > Can I apply it to GCC trunk? Please. r~
PATCH][ARM][4.8]Backport "Fix definition of __ARM_SIZEOF_WCHAR_T"
Hi all, This is a backport patch for https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=213864 arm-none-eabi regression test has been done, no new issues. Okay for branch 4.8? gcc/ChangeLog Fix PR target/61413 Backport from mainline. 2014-08-12 Ramana Radhakrishnan PR target/61413 * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Fix definition of __ARM_SIZEOF_WCHAR_T. diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index c0f2184..2eb 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -74,8 +74,8 @@ extern char arm_arch_name[]; builtin_define_with_int_value (\ "__ARM_SIZEOF_MINIMAL_ENUM",\ flag_short_enums ? 1 : 4);\ - builtin_define_with_int_value (\ - "__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE); \ + builtin_define_type_sizeof ("__ARM_SIZEOF_WCHAR_T", \ +wchar_type_node); \ if (TARGET_ARM_ARCH_PROFILE)\ builtin_define_with_int_value ( \ "__ARM_ARCH_PROFILE", TARGET_ARM_ARCH_PROFILE); \
Re: PATCH][ARM][4.8]Backport "Fix definition of __ARM_SIZEOF_WCHAR_T"
On 13/01/15 15:53, Renlin Li wrote: > Hi all, > > This is a backport patch for > https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=213864 > > arm-none-eabi regression test has been done, no new issues. > Okay for branch 4.8? > > gcc/ChangeLog > Fix PR target/61413 > Backport from mainline. > > 2014-08-12 Ramana Radhakrishnan > > PR target/61413 > * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Fix definition > of __ARM_SIZEOF_WCHAR_T. > > > > backport.patch > > > diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h > index c0f2184..2eb 100644 > --- a/gcc/config/arm/arm.h > +++ b/gcc/config/arm/arm.h > @@ -74,8 +74,8 @@ extern char arm_arch_name[]; > builtin_define_with_int_value ( \ > "__ARM_SIZEOF_MINIMAL_ENUM", \ > flag_short_enums ? 1 : 4);\ > - builtin_define_with_int_value ( \ > - "__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE); \ > + builtin_define_type_sizeof ("__ARM_SIZEOF_WCHAR_T", \ > + wchar_type_node); \ > if (TARGET_ARM_ARCH_PROFILE)\ > builtin_define_with_int_value ( \ > "__ARM_ARCH_PROFILE", TARGET_ARM_ARCH_PROFILE); \ > OK.
[ARM]Make CLZ_DEFINED_VALUE_AT_ZERO and CTZ_DEFINED_VALUE_AT_ZERO return 2.
Hi all, This patch update CLZ_DEFINED_VALUE_AT_ZERO and CTZ_DEFINED_VALUE_AT_ZERO to make them return 2 in arm back-end. Here are the explanations from GCC documentation: CLZ_DEFINED_VALUE_AT_ZERO (mode, value) CTZ_DEFINED_VALUE_AT_ZERO (mode, value) A C expression that indicates whether the architecture defines a value for @code{clz} or @code{ctz} with a zero operand. A result of 0 indicates the value is undefined. If the value is defined for only the RTL expression, the macro should evaluate to 1; if the value applies also to the corresponding optab entry (which is normally the case if it expands directly into the corresponding RTL), then the macro should evaluate to 2. In the cases where the value is defined, @var{value} should be set to this value. arm-none-eabi has been test on the model, no new issue. Okay for trunk? gcc/ChangeLog: 2015-01-13 Renlin Li * config/arm/arm.h (CLZ_DEFINED_VALUE_AT_ZERO): Return 2. (CTZ_DEFINED_VALUE_AT_ZERO): Ditto. diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index d850982..83c9c33 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -2145,9 +2145,9 @@ extern int making_const_table; : reverse_condition (code)) #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ - ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE)) + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ - ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE)) + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) #define CC_STATUS_INIT \ do { cfun->machine->thumb1_cc_insn = NULL_RTX; } while (0)
Re: [ARM]Make CLZ_DEFINED_VALUE_AT_ZERO and CTZ_DEFINED_VALUE_AT_ZERO return 2.
On 13/01/15 15:58, Renlin Li wrote: > Hi all, > > This patch update CLZ_DEFINED_VALUE_AT_ZERO and > CTZ_DEFINED_VALUE_AT_ZERO to make them return 2 in > arm back-end. > > Here are the explanations from GCC documentation: > > CLZ_DEFINED_VALUE_AT_ZERO (mode, value) > CTZ_DEFINED_VALUE_AT_ZERO (mode, value) > A C expression that indicates whether the architecture defines a value > for @code{clz} or @code{ctz} with a zero operand. > A result of 0 indicates the value is undefined. > If the value is defined for only the RTL expression, the macro should > evaluate to 1; if the value applies also to the corresponding optab > entry (which is normally the case if it expands directly into > the corresponding RTL), then the macro should evaluate to 2. > In the cases where the value is defined, @var{value} should be set to > this value. > > arm-none-eabi has been test on the model, no new issue. > Okay for trunk? > > gcc/ChangeLog: > > 2015-01-13 Renlin Li > > * config/arm/arm.h (CLZ_DEFINED_VALUE_AT_ZERO): Return 2. > (CTZ_DEFINED_VALUE_AT_ZERO): Ditto. > > OK. R.
[PATCH] Fix REE for vector modes (PR rtl-optimization/64286, take 2)
On Mon, Jan 12, 2015 at 02:29:53PM -0700, Jeff Law wrote: > On 01/12/15 12:59, Jakub Jelinek wrote: > >Hi! > > > >As mentioned in the PR, giving up for all vector mode extensions > >is unnecessary, but unlike scalar integer extensions, where the low part > >of the extended value is the original value, for vectors this is not true, > >thus the old value is lost. Which means we can perform REE, but only if > >all uses of the definition are the same (code+mode) extension. > > > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > > >2015-01-12 Jakub Jelinek > > > > PR rtl-optimization/64286 > > * ree.c (add_removable_extension): Don't add vector mode > > extensions if all uses of the source register aren't the same > > vector extensions. > > > > * gcc.target/i386/avx2-pr64286.c: New test. > Does it make sense to remove your change for 59754 in combine_reaching_defs? > Shouldn't this patch handle that case as well? You're right, this patch handles that too. New patch, bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2015-01-13 Jakub Jelinek PR rtl-optimization/64286 * ree.c (combine_reaching_defs): Move part of comment earlier, remove !SCALAR_INT_MODE_P check. (add_removable_extension): Don't add vector mode extensions if all uses of the source register aren't the same vector extensions. * gcc.target/i386/avx2-pr64286.c: New test. --- gcc/ree.c.jj2015-01-12 21:29:07.023060045 +0100 +++ gcc/ree.c 2015-01-13 09:43:32.158449885 +0100 @@ -783,6 +783,17 @@ combine_reaching_defs (ext_cand *cand, c != REGNO (get_extended_src_reg (SET_SRC (PATTERN (cand->insn); if (copy_needed) { + /* Considering transformation of +(set (reg1) (expression)) +... +(set (reg2) (any_extend (reg1))) + +into + +(set (reg2) (any_extend (expression))) +(set (reg1) (reg2)) +... */ + /* In theory we could handle more than one reaching def, it just makes the code to update the insn stream more complex. */ if (state->defs_list.length () != 1) @@ -798,18 +809,6 @@ combine_reaching_defs (ext_cand *cand, c if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE) return false; - /* Transformation of -(set (reg1) (expression)) -(set (reg2) (any_extend (reg1))) -into -(set (reg2) (any_extend (expression))) -(set (reg1) (reg2)) -is only valid for scalar integral modes, as it relies on the low -subreg of reg1 to have the value of (expression), which is not true -e.g. for vector modes. */ - if (!SCALAR_INT_MODE_P (GET_MODE (SET_DEST (PATTERN (cand->insn) - return false; - machine_mode dst_mode = GET_MODE (SET_DEST (PATTERN (cand->insn))); rtx src_reg = get_extended_src_reg (SET_SRC (PATTERN (cand->insn))); @@ -1027,6 +1026,7 @@ add_removable_extension (const_rtx expr, different extension. FIXME: this obviously can be improved. */ for (def = defs; def; def = def->next) if ((idx = def_map[INSN_UID (DF_REF_INSN (def->ref))]) + && idx != -1U && (cand = &(*insn_list)[idx - 1]) && cand->code != code) { @@ -1038,6 +1038,57 @@ add_removable_extension (const_rtx expr, } return; } + /* For vector mode extensions, ensure that all uses of the + XEXP (src, 0) register are the same extension (both code + and to which mode), as unlike integral extensions lowpart + subreg of the sign/zero extended register are not equal + to the original register, so we have to change all uses or + none. */ + else if (VECTOR_MODE_P (GET_MODE (XEXP (src, 0 + { + if (idx == 0) + { + struct df_link *ref_chain, *ref_link; + + ref_chain = DF_REF_CHAIN (def->ref); + for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) + { + if (ref_link->ref == NULL + || DF_REF_INSN_INFO (ref_link->ref) == NULL) + { + idx = -1U; + break; + } + rtx_insn *use_insn = DF_REF_INSN (ref_link->ref); + const_rtx use_set; + if (use_insn == insn || DEBUG_INSN_P (use_insn)) + continue; + if (!(use_set = single_set (use_insn)) + || !REG_P (SET_DEST (use_set)) + || GET_MODE (SET_DEST (use_set)) != GET_MODE (dest) + || GET_CODE (SET_SRC (use_set)) != code + || !rtx_equal_p (XEXP (SET_SRC (use_set), 0), +XEXP (src, 0))) + { +
[PATCH] Reenable CSE of non-volatile inline asm (PR rtl-optimization/63637)
Hi! My PR60663 fix unfortunately stopped CSE of all inline-asms, even when they e.g. only have the clobbers added by default. This patch attempts to restore the old behavior, with the exceptions: 1) as always, asm volatile is not CSEd 2) inline-asm with multiple outputs are not CSEd 3) on request from Richard (which Segher on IRC argues against), "memory" clobber also prevents CSE; this can be removed by removing the int j, lim = XVECLEN (x, 0); and loop below it 4) inline-asm with clobbers is never copied into an insn that wasn't inline-asm before, so if there are clobbers, we allow CSEing of e.g. two same inline-asms, but only by reusing results of one of those Bootstrapped/regtested on x86_64-linux and i686-linux, tested also with arm cross after reverting the PR60663 arm cost fix. Ok for trunk this way, or with 3) removed? 2015-01-13 Jakub Jelinek PR rtl-optimization/63637 PR rtl-optimization/60663 * cse.c (merge_equiv_classes): Set new_elt->cost to MAX_COST if elt->cost is MAX_COST for ASM_OPERANDS. (find_sets_in_insn): Fix up comment typo. (cse_insn): Don't set src_volatile for all non-volatile ASM_OPERANDS in PARALLELs, but just those with multiple outputs or with "memory" clobber. Set elt->cost to MAX_COST for ASM_OPERANDS in PARALLEL. Set src_elt->cost to MAX_COST if new_src is ASM_OPERANDS and elt->cost is MAX_COST. * gcc.dg/pr63637-1.c: New test. * gcc.dg/pr63637-2.c: New test. * gcc.dg/pr63637-3.c: New test. * gcc.dg/pr63637-4.c: New test. * gcc.dg/pr63637-5.c: New test. * gcc.dg/pr63637-6.c: New test. * gcc.target/i386/pr63637-1.c: New test. * gcc.target/i386/pr63637-2.c: New test. * gcc.target/i386/pr63637-3.c: New test. * gcc.target/i386/pr63637-4.c: New test. * gcc.target/i386/pr63637-5.c: New test. * gcc.target/i386/pr63637-6.c: New test. --- gcc/cse.c.jj2015-01-09 21:59:44.0 +0100 +++ gcc/cse.c 2015-01-13 13:26:23.391216064 +0100 @@ -1792,6 +1792,8 @@ merge_equiv_classes (struct table_elt *c } new_elt = insert (exp, class1, hash, mode); new_elt->in_memory = hash_arg_in_memory; + if (GET_CODE (exp) == ASM_OPERANDS && elt->cost == MAX_COST) + new_elt->cost = MAX_COST; } } } @@ -4258,7 +4260,7 @@ find_sets_in_insn (rtx_insn *insn, struc { int i, lim = XVECLEN (x, 0); - /* Go over the epressions of the PARALLEL in forward order, to + /* Go over the expressions of the PARALLEL in forward order, to put them in the same order in the SETS array. */ for (i = 0; i < lim; i++) { @@ -4634,12 +4636,27 @@ cse_insn (rtx_insn *insn) && REGNO (dest) >= FIRST_PSEUDO_REGISTER) sets[i].src_volatile = 1; - /* Also do not record result of a non-volatile inline asm with -more than one result or with clobbers, we do not want CSE to -break the inline asm apart. */ else if (GET_CODE (src) == ASM_OPERANDS && GET_CODE (x) == PARALLEL) - sets[i].src_volatile = 1; + { + /* Do not record result of a non-volatile inline asm with +more than one result. */ + if (n_sets > 1) + sets[i].src_volatile = 1; + + int j, lim = XVECLEN (x, 0); + for (j = 0; j < lim; j++) + { + rtx y = XVECEXP (x, 0, j); + /* And do not record result of a non-volatile inline asm +with "memory" clobber. */ + if (GET_CODE (y) == CLOBBER && MEM_P (XEXP (y, 0))) + { + sets[i].src_volatile = 1; + break; + } + } + } #if 0 /* It is no longer clear why we used to do this, but it doesn't @@ -5230,8 +5247,8 @@ cse_insn (rtx_insn *insn) ; /* Look for a substitution that makes a valid insn. */ - else if (validate_unshare_change -(insn, &SET_SRC (sets[i].rtl), trial, 0)) + else if (validate_unshare_change (insn, &SET_SRC (sets[i].rtl), + trial, 0)) { rtx new_rtx = canon_reg (SET_SRC (sets[i].rtl), insn); @@ -5593,6 +5610,12 @@ cse_insn (rtx_insn *insn) } elt = insert (src, classp, sets[i].src_hash, mode); elt->in_memory = sets[i].src_in_memory; + /* If inline asm has any clobbers, ensure we only reuse + existing inline asms and never try to put the ASM_OPERANDS + into an insn that isn't inline asm. */ + if (GET_CODE (src) == ASM_OPERANDS + && GET_CODE (x) == PARALLEL) + elt->cost = MAX_COST; sets[i].src_elt = classp = elt; } if (sets[i].src_cons
Re: Open Issues in the TSAN Runtime
On Mon, Jan 12, 2015 at 09:55:15PM +0100, Jakub Jelinek wrote: > > specific changes from there. > > Yes, I'll try to cherry-pick those tomorrow. > > > I am especially interested in fixing these two issues, but there may be > > other important improvements too: > > > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64350 : TSAN fails after > > stress-testing for a while > > > > was fixed by > > > > http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_deadlock_detector.h?r1=224518&r2=224517&pathrev=224518 > > http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_deadlock_detector.h?r1=224519&r2=224518&pathrev=224519 > > > > > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63251 : tsan: corrupted shadow > > stack > > > > was fixed by > > > > http://llvm.org/viewvc/llvm-project?view=revision&revision=224702 > > http://llvm.org/viewvc/llvm-project?view=revision&revision=224834 So do you mean following? I've bootstrapped/regtested on x86_64-linux and i686-linux, but haven't tried any special testcases on it. If it works for you, I'll commit it. 2015-01-13 Jakub Jelinek * sanitizer_common/sanitizer_deadlock_detector.h: Cherry pick upstream r224518 and r224519. * tsan/tsan_rtl_thread.cc: Cherry pick upstream r224702 and r224834. --- libsanitizer/sanitizer_common/sanitizer_deadlock_detector.h.jj 2014-05-22 10:18:14.893626024 +0200 +++ libsanitizer/sanitizer_common/sanitizer_deadlock_detector.h 2015-01-13 14:07:40.096414924 +0100 @@ -48,6 +48,8 @@ class DeadlockDetectorTLS { if (epoch_ == current_epoch) return; bv_.clear(); epoch_ = current_epoch; +n_recursive_locks = 0; +n_all_locks_ = 0; } uptr getEpoch() const { return epoch_; } @@ -81,7 +83,8 @@ class DeadlockDetectorTLS { } } // Printf("remLock: %zx %zx\n", lock_id, epoch_); -CHECK(bv_.clearBit(lock_id)); +if (!bv_.clearBit(lock_id)) + return; // probably addLock happened before flush if (n_all_locks_) { for (sptr i = n_all_locks_ - 1; i >= 0; i--) { if (all_locks_with_contexts_[i].lock == static_cast(lock_id)) { @@ -173,6 +176,7 @@ class DeadlockDetector { recycled_nodes_.clear(); available_nodes_.setAll(); g_.clear(); +n_edges_ = 0; return getAvailableNode(data); } --- libsanitizer/tsan/tsan_rtl_thread.cc.jj 2014-09-24 11:08:03.824028080 +0200 +++ libsanitizer/tsan/tsan_rtl_thread.cc2015-01-13 14:08:06.167954292 +0100 @@ -109,12 +109,13 @@ void ThreadContext::OnStarted(void *arg) thr->dd_pt = ctx->dd->CreatePhysicalThread(); thr->dd_lt = ctx->dd->CreateLogicalThread(unique_id); } + thr->fast_state.SetHistorySize(flags()->history_size); + // Commit switch to the new part of the trace. + // TraceAddEvent will reset stack0/mset0 in the new part for us. + TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); + thr->fast_synch_epoch = epoch0; AcquireImpl(thr, 0, &sync); - thr->fast_state.SetHistorySize(flags()->history_size); - const uptr trace = (epoch0 / kTracePartSize) % TraceParts(); - Trace *thr_trace = ThreadTrace(thr->tid); - thr_trace->headers[trace].epoch0 = epoch0; StatInc(thr, StatSyncAcquire); sync.Reset(&thr->clock_cache); DPrintf("#%d: ThreadStart epoch=%zu stk_addr=%zx stk_size=%zx " Jakub
Re: [PATCH] IPA ICF: add comparison for target and optimization nodes
> Hello. > > Following patch adds support for target and optimization nodes comparison, > which is > based on Honza's newly added infrastructure. > > Apart from that, there's a small hunk that corrects formatting and removes > unnecessary > call to a comparison function. > > Hope it can be applied as one patch. > > Tested on x86_64-linux-pc without any new regression introduction. > > Ready for trunk? OK, thanks! Honza > > Thank you, > Martin > >From 393eaa47c8aef9a91a1c635016f23ca2f5aa25e4 Mon Sep 17 00:00:00 2001 > From: mliska > Date: Tue, 6 Jan 2015 15:06:18 +0100 > Subject: [PATCH] IPA ICF: target and optimization flags comparison. > > gcc/ChangeLog: > > 2015-01-06 Martin Liska > > * cgraphunit.c (cgraph_node::create_wrapper): Fix level of indentation. > * ipa-icf.c (sem_function::equals_private): Add support for target and > (sem_item_optimizer::merge_classes): Remove redundant function > comparison. > optimization flags comparison. > * tree.h (target_opts_for_fn): New function. > --- > gcc/cgraphunit.c | 52 ++-- > gcc/ipa-icf.c| 44 +++- > gcc/tree.h | 10 ++ > 3 files changed, 79 insertions(+), 27 deletions(-) > > diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c > index c8c8562..81246e2 100644 > --- a/gcc/cgraphunit.c > +++ b/gcc/cgraphunit.c > @@ -2385,40 +2385,40 @@ cgraphunit_c_finalize (void) > void > cgraph_node::create_wrapper (cgraph_node *target) > { > -/* Preserve DECL_RESULT so we get right by reference flag. */ > -tree decl_result = DECL_RESULT (decl); > + /* Preserve DECL_RESULT so we get right by reference flag. */ > + tree decl_result = DECL_RESULT (decl); > > -/* Remove the function's body but keep arguments to be reused > - for thunk. */ > -release_body (true); > -reset (); > + /* Remove the function's body but keep arguments to be reused > + for thunk. */ > + release_body (true); > + reset (); > > -DECL_RESULT (decl) = decl_result; > -DECL_INITIAL (decl) = NULL; > -allocate_struct_function (decl, false); > -set_cfun (NULL); > + DECL_RESULT (decl) = decl_result; > + DECL_INITIAL (decl) = NULL; > + allocate_struct_function (decl, false); > + set_cfun (NULL); > > -/* Turn alias into thunk and expand it into GIMPLE representation. */ > -definition = true; > -thunk.thunk_p = true; > -thunk.this_adjusting = false; > + /* Turn alias into thunk and expand it into GIMPLE representation. */ > + definition = true; > + thunk.thunk_p = true; > + thunk.this_adjusting = false; > > -cgraph_edge *e = create_edge (target, NULL, 0, CGRAPH_FREQ_BASE); > + cgraph_edge *e = create_edge (target, NULL, 0, CGRAPH_FREQ_BASE); > > -tree arguments = DECL_ARGUMENTS (decl); > + tree arguments = DECL_ARGUMENTS (decl); > > -while (arguments) > - { > - TREE_ADDRESSABLE (arguments) = false; > - arguments = TREE_CHAIN (arguments); > - } > + while (arguments) > +{ > + TREE_ADDRESSABLE (arguments) = false; > + arguments = TREE_CHAIN (arguments); > +} > > -expand_thunk (false, true); > -e->call_stmt_cannot_inline_p = true; > + expand_thunk (false, true); > + e->call_stmt_cannot_inline_p = true; > > -/* Inline summary set-up. */ > -analyze (); > -inline_analyze_function (this); > + /* Inline summary set-up. */ > + analyze (); > + inline_analyze_function (this); > } > > #include "gt-cgraphunit.h" > diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c > index c7ba75a..28158b3 100644 > --- a/gcc/ipa-icf.c > +++ b/gcc/ipa-icf.c > @@ -427,6 +427,49 @@ sem_function::equals_private (sem_item *item, >if (!equals_wpa (item, ignored_nodes)) > return false; > > + /* Checking function TARGET and OPTIMIZATION flags. */ > + cl_target_option *tar1 = target_opts_for_fn (decl); > + cl_target_option *tar2 = target_opts_for_fn (m_compared_func->decl); > + > + if (tar1 != NULL || tar2 != NULL) > +{ > + if (!cl_target_option_eq (tar1, tar2)) > + { > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, "Source target flags\n"); > + cl_target_option_print (dump_file, 2, tar1); > + fprintf (dump_file, "Target target flags\n"); > + cl_target_option_print (dump_file, 2, tar2); > + } > + > + return return_false_with_msg ("Target flags are different"); > + } > +} > + else if (tar1 != NULL || tar2 != NULL) > +return return_false_with_msg ("Target flags are different"); > + > + cl_optimization *opt1 = opts_for_fn (decl); > + cl_optimization *opt2 = opts_for_fn (m_compared_func->decl); > + > + if (opt1 != NULL && opt2 != NULL) > +{ > + if (memcmp (opt1, opt2, sizeof(cl_optimization))) > + { > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fpr
[PATCH] Don't set TREE_READONLY on dummy args with VALUE attr (PR fortran/64528)
Hi! With VALUE attr, the PARM_DECLs hold the values and thus are (usually) not read-only, therefore telling the middle-end they are read-only leads to invalid IL. Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2015-01-13 Jakub Jelinek PR fortran/64528 * trans-decl.c (create_function_arglist): Don't set TREE_READONLY on dummy args with VALUE attribute. * gfortran.dg/pr64528.f90: New test. --- gcc/fortran/trans-decl.c.jj 2015-01-09 21:59:47.0 +0100 +++ gcc/fortran/trans-decl.c2015-01-13 14:24:22.342682352 +0100 @@ -2327,8 +2327,9 @@ create_function_arglist (gfc_symbol * sy /* Fill in arg stuff. */ DECL_CONTEXT (parm) = fndecl; DECL_ARG_TYPE (parm) = TREE_VALUE (typelist); - /* All implementation args are read-only. */ - TREE_READONLY (parm) = 1; + /* All implementation args except for VALUE are read-only. */ + if (!f->sym->attr.value) + TREE_READONLY (parm) = 1; if (POINTER_TYPE_P (type) && (!f->sym->attr.proc_pointer && f->sym->attr.flavor != FL_PROCEDURE)) --- gcc/testsuite/gfortran.dg/pr64528.f90.jj2015-01-13 14:27:13.475650977 +0100 +++ gcc/testsuite/gfortran.dg/pr64528.f90 2015-01-13 14:26:46.0 +0100 @@ -0,0 +1,20 @@ +! PR fortran/64528 +! { dg-do compile } +! { dg-options "-O -fno-tree-dce -fno-tree-ccp" } + +program pr64528 + interface + subroutine foo(x) + integer, value :: x + end subroutine foo + end interface + integer :: x + x = 10 + call foo(x) + if(x .ne. 10) then + endif +end program pr64528 +subroutine foo(x) + integer, value :: x + x = 11 +end subroutine foo Jakub
[PATCH] Fix ICE with -fgnu-tm and pragma ivdep (PR middle-end/64391)
We ICE on this testcase, because the usage of #pragma GCC ivdep pulls in the ANNOTATE internal functions which don't have underlying fndecls, hence we segv on a NULL_TREE. This patch makes get_attrs_for be prepared for such a scenario. The callers of get_attrs_for already check for NULL_TREE. I don't think internal fns can have transaction_* attributes anyway. While at it, I did some cleanups. Bootstrapped/regtested on {ppc64,x86_64}-linux, ok for trunk? 2015-01-13 Marek Polacek PR middle-end/64391 * trans-mem.c (get_attrs_for): Return NULL_TREE if X is NULL_TREE. * gcc.dg/tm/pr64391.c: New test. diff --git gcc/testsuite/gcc.dg/tm/pr64391.c gcc/testsuite/gcc.dg/tm/pr64391.c index e69de29..235118a 100644 --- gcc/testsuite/gcc.dg/tm/pr64391.c +++ gcc/testsuite/gcc.dg/tm/pr64391.c @@ -0,0 +1,10 @@ +/* PR middle-end/64391 */ +/* { dg-do compile } */ +/* { dg-options "-fgnu-tm" } */ + +void +foo (void) +{ +#pragma GCC ivdep + while (1); +} diff --git gcc/trans-mem.c gcc/trans-mem.c index b449760..21fa497 100644 --- gcc/trans-mem.c +++ gcc/trans-mem.c @@ -183,6 +183,9 @@ static void *expand_regions (struct tm_region *, static tree get_attrs_for (const_tree x) { + if (x == NULL_TREE) +return NULL_TREE; + switch (TREE_CODE (x)) { case FUNCTION_DECL: @@ -191,16 +194,16 @@ get_attrs_for (const_tree x) default: if (TYPE_P (x)) - return NULL; + return NULL_TREE; x = TREE_TYPE (x); if (TREE_CODE (x) != POINTER_TYPE) - return NULL; + return NULL_TREE; /* FALLTHRU */ case POINTER_TYPE: x = TREE_TYPE (x); if (TREE_CODE (x) != FUNCTION_TYPE && TREE_CODE (x) != METHOD_TYPE) - return NULL; + return NULL_TREE; /* FALLTHRU */ case FUNCTION_TYPE: Marek
Re: [PATCH] Don't set TREE_READONLY on dummy args with VALUE attr (PR fortran/64528)
Jakub Jelinek wrote: > With VALUE attr, the PARM_DECLs hold the values and thus are (usually) not > read-only, therefore telling the middle-end they are read-only leads to > invalid IL. > > Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for > trunk? OK. Thanks for the patch. I haven't checked whether it also applies to 4.8/4.9, if so, the patch is also okay for those branches. Tobias
Re: flatten expr.h (version 2)
On 13 January 2015 at 16:06, Prathamesh Kulkarni wrote: > On 13 January 2015 at 15:34, Richard Biener wrote: >> On Sun, 11 Jan 2015, Prathamesh Kulkarni wrote: >> >>> Hi, >>> This is a revamped expr.h flattening flattening patch rebased on >>> tree.h and tree-core.h flattening patch (r219402). >>> It depends upon the following patch to get committed. >>> https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00565.html >>> >>> Changes: >>> * Removed all includes except tree-core.h. Put includes required by >>> expr.h in a comment. >>> * Moved stmt.c, expmed.c prototypes to stmt.h, expmed.h respectively. >>> * Adjusted generator programs: genemit.c, gengtype.c, genopinit.c, >>> genoutput.c. >>> * Did not put includes in gcc-plugin.h since expr.h cannot be included >>> by plugins >>> (putting them broke building a file in c-family/ since expr.h is not >>> allowed in front-ends) >>> * Affects java front-end (expr.h is allowed in java front-end). >>> >>> Bootstrapped and tested on x86_64-unknown-linux-gnu with languages: >>> all,go,ada,jit >>> Built on all targets in config-list.mk with languages: all, go. >>> OK to commit ? >> >> diff --git a/gcc/expr.c b/gcc/expr.c >> index fc22862..824541e 100644 >> --- a/gcc/expr.c >> +++ b/gcc/expr.c >> @@ -41,11 +41,17 @@ along with GCC; see the file COPYING3. If not see >> #include "regs.h" >> #include "hard-reg-set.h" >> #include "except.h" >> -#include "input.h" >> #include "function.h" >> #include "insn-config.h" >> #include "insn-attr.h" >> /* Include expr.h after insn-config.h so we get HAVE_conditional_move. >> */ >> +#include "hashtab.h" >> +#include "emit-rtl.h" >> +#include "expmed.h" >> +#include "stmt.h" >> +#include "statistics.h" >> +#include "real.h" >> +#include "fixed-value.h" >> #include "expr.h" >> >> Please move the comment to the proper place > ah, my flattening tool doesn't look at comments. I will move the > comment before expr.h include, thanks. >> >> diff --git a/gcc/expr.h b/gcc/expr.h >> index a7638b8..f1be8dc 100644 >> --- a/gcc/expr.h >> +++ b/gcc/expr.h >> @@ -20,7 +20,8 @@ along with GCC; see the file COPYING3. If not see >> #ifndef GCC_EXPR_H >> #define GCC_EXPR_H >> >> -/* For inhibit_defer_pop */ >> +/* expr.h required includes */ >> +#if 0 >> #include "hashtab.h" >> #include "hash-set.h" >> #include "vec.h" >> @@ -29,15 +30,17 @@ along with GCC; see the file COPYING3. If not see >> #include "hard-reg-set.h" >> #include "input.h" >> #include "function.h" >> -/* For XEXP, GEN_INT, rtx_code */ >> #include "rtl.h" >> -/* For optimize_size */ >> #include "flags.h" >> -/* For tree_fits_[su]hwi_p, tree_to_[su]hwi, fold_convert, size_binop, >> - ssize_int, TREE_CODE, TYPE_SIZE, int_size_in_bytes,*/ >> #include "tree-core.h" >> -/* For GET_MODE_BITSIZE, word_mode */ >> #include "insn-config.h" >> +#include "alias.h" >> +#include "emit-rtl.h" >> +#include "expmed.h" >> +#include "stmt.h" >> +#endif >> >> Err, please remove the #if 0 section > I kept it because if something breaks later (hopefully not!), it will > be easier to fix. > I will remove it. >> >> + >> +#include "tree-core.h" >> >> Why? The original comment says >> >> -/* For tree_fits_[su]hwi_p, tree_to_[su]hwi, fold_convert, size_binop, >> - ssize_int, TREE_CODE, TYPE_SIZE, int_size_in_bytes,*/ >> >> but all those are declared in tree.h. Which means the files including >> expr.h must already include tree.h. >> >> If that's not the reason we need to include tree-core.h from expr.c >> please add a comment explaining why. > bt-load.c fails to compile because it includes expr.h but does not > include tree.h > I will place tree.h include in all files that include expr.h and rebuild. This is not going to work, since tree.h is now flattened. Shall also require including all headers required by tree.h in all files that include expr.h. Could we retain tree-core.h in expr.h for now ? Or should I insert tree.h (along with tree.h required includes) in all files that include expr.h ? Thanks, Prathamesh >> >> -/* Definitions from emit-rtl.c */ >> -#include "emit-rtl.h" >> - >> /* Return a memory reference like MEMREF, but with its mode widened to >> MODE and adjusted by OFFSET. */ >> extern rtx widen_memory_access (rtx, machine_mode, HOST_WIDE_INT); >> >> err - functions defined in emit-rtl.c should be declared in emit-rtl.h. >> Please fix that first. expr.h should _only_ contain prototypes >> for stuff defined in expr.c. > oops, missed it :( >> >> Andrew did a good job with this, first cleaning up a header moving >> declarations to proper places and only after that flattening it. >> >> The rest of the patch looks good to me but expr.h isn't in a good >> shape after it. > I will work on it and send patch with suggested changes by tomorrow. > > Thanks, > Prathamesh >> >> Thanks, >> Richard.