Eric Botcazou <[email protected]> writes:
>> * reload.c (push_reload): Check contains_reg_of_mode.
>> * reload1.c (strip_paradoxical_subreg): New function.
>> (gen_reload_chain_without_interm_reg_p): Use it to handle
>> paradoxical subregs.
>> (emit_output_reload_insns, gen_reload): Likewise.
>
> Testing (not a full cycle, but still) revealed no problems on SPARC
> (32-bit and 64-bit) or IA-64. The patch is OK as far as I'm
> concerned, but you might want to get a second opinion from a reload
> expert.
Thanks. Rather than hand-picking an expert, I compromised and waited for
a couple of days to see if anyone had any comments or objections.
> Now I have a couple of requests:
>
> 1. Could you add PR rtl-optimization/48830 to the ChangeLog and install the
> testcase (attached) distilled by Hans-Peter as gcc.target/sparc/ultrasp12.c?
OK, done.
> 2. Could you rename the first parameter of the new function?
> "focus" sounds a little strange to me and is unheard of in the GCC
> codebase. Maybe "target"?
I went for "op". "target" might have been misleading because the
parameter is sometimes the source (rather than destination) of the
reload.
Here's what I installed after retesting on x86_64-linux-gnu.
Thanks again for the review.
Richard
gcc/
PR rtl-optimization/48830
PR rtl-optimization/48808
PR rtl-optimization/48792
* reload.c (push_reload): Check contains_reg_of_mode.
* reload1.c (strip_paradoxical_subreg): New function.
(gen_reload_chain_without_interm_reg_p): Use it to handle
paradoxical subregs.
(emit_output_reload_insns, gen_reload): Likewise.
gcc/testsuite/
2011-06-01 Eric Botcazou <[email protected]>
Hans-Peter Nilsson <[email protected]>
PR rtl-optimization/48830
* gcc.target/sparc/ultrasp12.c: New test.
Index: gcc/reload.c
===================================================================
--- gcc/reload.c 2011-05-30 17:26:36.000000000 +0100
+++ gcc/reload.c 2011-06-01 18:45:48.000000000 +0100
@@ -1019,6 +1019,7 @@ push_reload (rtx in, rtx out, rtx *inloc
#ifdef CANNOT_CHANGE_MODE_CLASS
&& !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (in)), inmode, rclass)
#endif
+ && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (in))]
&& (CONSTANT_P (SUBREG_REG (in))
|| GET_CODE (SUBREG_REG (in)) == PLUS
|| strict_low
@@ -1125,6 +1126,7 @@ push_reload (rtx in, rtx out, rtx *inloc
#ifdef CANNOT_CHANGE_MODE_CLASS
&& !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (out)), outmode,
rclass)
#endif
+ && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (out))]
&& (CONSTANT_P (SUBREG_REG (out))
|| strict_low
|| (((REG_P (SUBREG_REG (out))
Index: gcc/reload1.c
===================================================================
--- gcc/reload1.c 2011-05-30 17:26:36.000000000 +0100
+++ gcc/reload1.c 2011-06-01 18:50:01.000000000 +0100
@@ -4471,6 +4471,43 @@ scan_paradoxical_subregs (rtx x)
}
}
}
+
+/* *OP_PTR and *OTHER_PTR are two operands to a conceptual reload.
+ If *OP_PTR is a paradoxical subreg, try to remove that subreg
+ and apply the corresponding narrowing subreg to *OTHER_PTR.
+ Return true if the operands were changed, false otherwise. */
+
+static bool
+strip_paradoxical_subreg (rtx *op_ptr, rtx *other_ptr)
+{
+ rtx op, inner, other, tem;
+
+ op = *op_ptr;
+ if (GET_CODE (op) != SUBREG)
+ return false;
+
+ inner = SUBREG_REG (op);
+ if (GET_MODE_SIZE (GET_MODE (op)) <= GET_MODE_SIZE (GET_MODE (inner)))
+ return false;
+
+ other = *other_ptr;
+ tem = gen_lowpart_common (GET_MODE (inner), other);
+ if (!tem)
+ return false;
+
+ /* If the lowpart operation turned a hard register into a subreg,
+ rather than simplifying it to another hard register, then the
+ mode change cannot be properly represented. For example, OTHER
+ might be valid in its current mode, but not in the new one. */
+ if (GET_CODE (tem) == SUBREG
+ && REG_P (other)
+ && HARD_REGISTER_P (other))
+ return false;
+
+ *op_ptr = inner;
+ *other_ptr = tem;
+ return true;
+}
/* A subroutine of reload_as_needed. If INSN has a REG_EH_REGION note,
examine all of the reload insns between PREV and NEXT exclusive, and
@@ -5538,7 +5575,7 @@ gen_reload_chain_without_interm_reg_p (i
chain reloads or do need an intermediate hard registers. */
bool result = true;
int regno, n, code;
- rtx out, in, tem, insn;
+ rtx out, in, insn;
rtx last = get_last_insn ();
/* Make r2 a component of r1. */
@@ -5557,11 +5594,7 @@ gen_reload_chain_without_interm_reg_p (i
/* If IN is a paradoxical SUBREG, remove it and try to put the
opposite SUBREG on OUT. Likewise for a paradoxical SUBREG on OUT. */
- if (GET_CODE (in) == SUBREG
- && (GET_MODE_SIZE (GET_MODE (in))
- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (in))))
- && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (in)), out)) != 0)
- in = SUBREG_REG (in), out = tem;
+ strip_paradoxical_subreg (&in, &out);
if (GET_CODE (in) == PLUS
&& (REG_P (XEXP (in, 0))
@@ -7557,7 +7590,6 @@ emit_output_reload_insns (struct insn_ch
if (tertiary_icode != CODE_FOR_nothing)
{
rtx third_reloadreg = rld[tertiary_reload].reg_rtx;
- rtx tem;
/* Copy primary reload reg to secondary reload reg.
(Note that these have been swapped above, then
@@ -7566,13 +7598,7 @@ emit_output_reload_insns (struct insn_ch
/* If REAL_OLD is a paradoxical SUBREG, remove it
and try to put the opposite SUBREG on
RELOADREG. */
- if (GET_CODE (real_old) == SUBREG
- && (GET_MODE_SIZE (GET_MODE (real_old))
- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (real_old))))
- && 0 != (tem = gen_lowpart_common
- (GET_MODE (SUBREG_REG (real_old)),
- reloadreg)))
- real_old = SUBREG_REG (real_old), reloadreg = tem;
+ strip_paradoxical_subreg (&real_old, &reloadreg);
gen_reload (reloadreg, second_reloadreg,
rl->opnum, rl->when_needed);
@@ -8388,16 +8414,8 @@ gen_reload (rtx out, rtx in, int opnum,
/* If IN is a paradoxical SUBREG, remove it and try to put the
opposite SUBREG on OUT. Likewise for a paradoxical SUBREG on OUT. */
- if (GET_CODE (in) == SUBREG
- && (GET_MODE_SIZE (GET_MODE (in))
- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (in))))
- && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (in)), out)) != 0)
- in = SUBREG_REG (in), out = tem;
- else if (GET_CODE (out) == SUBREG
- && (GET_MODE_SIZE (GET_MODE (out))
- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (out))))
- && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (out)), in)) != 0)
- out = SUBREG_REG (out), in = tem;
+ if (!strip_paradoxical_subreg (&in, &out))
+ strip_paradoxical_subreg (&out, &in);
/* How to do this reload can get quite tricky. Normally, we are being
asked to reload a simple operand, such as a MEM, a constant, or a pseudo
Index: gcc/testsuite/gcc.target/sparc/ultrasp12.c
===================================================================
--- /dev/null 2011-06-01 19:41:05.354325326 +0100
+++ gcc/testsuite/gcc.target/sparc/ultrasp12.c 2011-06-01 18:52:09.000000000
+0100
@@ -0,0 +1,64 @@
+/* PR rtl-optimization/48830 */
+/* Testcase by Hans-Peter Nilsson <[email protected]> */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mcpu=ultrasparc -mvis" } */
+
+typedef unsigned char uint8_t;
+typedef unsigned int uint32_t;
+typedef unsigned long int uint64_t;
+typedef unsigned long int uintmax_t;
+typedef unsigned char rc_vec_t __attribute__((__vector_size__(8)));
+typedef short rc_svec_type_ __attribute__((__vector_size__(8)));
+typedef unsigned char rc_vec4_type_ __attribute__((__vector_size__(4)));
+
+void
+rc_stat_xsum_acc(const uint8_t *__restrict src1, int src1_dim,
+ const uint8_t *__restrict src2, int src2_dim,
+ int len, int height, uintmax_t sum[5])
+{
+ uint32_t s1 = 0;
+ uint32_t s2 = 0;
+ uintmax_t s11 = 0;
+ uintmax_t s22 = 0;
+ uintmax_t s12 = 0;
+ int full = len / ((1024) < (1024) ? (1024) : (1024));
+ int rem = len % ((1024) < (1024) ? (1024) : (1024));
+ int rem1 = rem / 1;
+ int y;
+ unsigned int rc_gsr_scale_ __attribute__ ((__unused__)) = 7; unsigned int
rc_gsr_align_ __attribute__ ((__unused__)) = 4; unsigned int rc_gsr_set_
__attribute__ ((__unused__)) = 0; register unsigned int rc_gsr_fakedep_
__attribute__ ((__unused__)) = 0; unsigned int rc_gsr_ldinit_ __attribute__
((__unused__)) = 0;
+ for (y = 0; y < height; y++) {
+ rc_vec_t a1, a2, a11, a22, a12;
+ int i1 = (y)*(src1_dim);
+ int i2 = (y)*(src2_dim);
+ int x;
+ ((a1) = ((rc_vec_t) {0}));
+ ((a2) = ((rc_vec_t) {0}));
+ ((a11) = ((rc_vec_t) {0}));
+ ((a22) = ((rc_vec_t) {0}));
+ ((a12) = ((rc_vec_t) {0}));
+ for (x = 0; x < full; x++) {
+ int k;
+ for (k = 0; k < ((1024) < (1024) ? (1024) : (1024)) /
+ 1; k++)
+ {
+ do { rc_vec_t v1, v2; ((v1) = *(const
rc_vec_t*)(&(src1)[i1])); ((v2) = *(const rc_vec_t*)(&(src2)[i2])); ((a1) =
(((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v1,
((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i;
})(uint64_t)(a1)).i)))).v)); ((a2) = (((union { rc_vec_t v; uint64_t i;
})(uint64_t)(__builtin_vis_pdist (v2, ((rc_vec_t) {0}), (((union { rc_vec_t v;
uint64_t i; })(uint64_t)(a2)).i)))).v)); do { rc_vec_t s1_ = (v1); rc_vec_t s2_
= (v1); rc_vec_t accvin_ = (a11); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t
maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255),
(255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_
s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_,
s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_;
rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_,
mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_;
rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union {
rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ =
(rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)};
((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if
(rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void);
rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if
(!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) ||
!rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2)
!= rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_);
rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_;
if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep
%[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm"
(rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_));
} else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR
%[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0"
(s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;#
dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0"
(s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union {
rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_;
RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_;
(s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ =
__builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_);
s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ =
__builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct {
rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_)
(s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do {
typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; }
RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) =
hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ =
__builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ =
__builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ =
__builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ =
__builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ +
mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ +
mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_;
mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union
{ rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_;
RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_;
(accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist
((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i;
})(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_,
((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_,
((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i;
})(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_,
((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_,
lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_)
{{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)),
((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}};
(accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on
%[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0"
(rc_gsr_fakedep_)); (a11) = accvout_; } while (0); do { rc_vec_t s1_ = (v2);
rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a22); rc_vec_t s1lo7_, s1msb_,
accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ =
(rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_,
s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_
s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_,
s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_
mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_,
mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_
zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t
msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80),
(0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if
(rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void);
rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if
(!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) ||
!rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2)
!= rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_);
rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_;
if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep
%[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm"
(rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_));
} else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR
%[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0"
(s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;#
dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0"
(s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union {
rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_;
RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_;
(s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ =
__builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_);
s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ =
__builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct {
rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_)
(s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do {
typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; }
RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) =
hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ =
__builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ =
__builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ =
__builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ =
__builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ +
mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ +
mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_;
mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union
{ rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_;
RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_;
(accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist
((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i;
})(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_,
((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_,
((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i;
})(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_,
((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_,
lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_)
{{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)),
((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}};
(accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on
%[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0"
(rc_gsr_fakedep_)); (a22) = accvout_; } while (0); do { rc_vec_t s1_ = (v1);
rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a12); rc_vec_t s1lo7_, s1msb_,
accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ =
(rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_,
s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_
s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_,
s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_
mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_,
mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_
zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t
msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80),
(0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if
(rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void);
rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if
(!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) ||
!rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2)
!= rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_);
rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_;
if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep
%[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm"
(rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_));
} else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR
%[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0"
(s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;#
dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0"
(s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union {
rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_;
RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_;
(s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ =
__builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_);
s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ =
__builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct {
rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_)
(s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do {
typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; }
RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) =
hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ =
__builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ =
__builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ =
__builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ =
__builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ +
mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ +
mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_;
mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union
{ rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_;
RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_;
(accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist
((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i;
})(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_,
((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_,
((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i;
})(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_,
((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_,
lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_)
{{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)),
((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}};
(accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on
%[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0"
(rc_gsr_fakedep_)); (a12) = accvout_; } while (0); (i1) += 8; (i2) += 8; }
while (0);
+
+ }
+ do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t
v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t
i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t
machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_,
lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_)
= hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ +
machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t
machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_,
lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_)
= hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ +
machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t
machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_,
lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_)
= hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ +
machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t)
{0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) =
((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) +=
t12; } while (0);
+ }
+ for (x = 0; x < rem1; x++) {
+ do { rc_vec_t v1, v2; ((v1) = *(const rc_vec_t*)(&(src1)[i1]));
((v2) = *(const rc_vec_t*)(&(src2)[i2])); ((a1) = (((union { rc_vec_t v;
uint64_t i; })(uint64_t)(__builtin_vis_pdist (v1, ((rc_vec_t) {0}), (((union {
rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)))).v)); ((a2) = (((union {
rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v2, ((rc_vec_t) {0}),
(((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)))).v)); do { rc_vec_t
s1_ = (v1); rc_vec_t s2_ = (v1); rc_vec_t accvin_ = (a11); rc_vec_t s1lo7_,
s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ =
(rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_,
s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_
s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_,
s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_
mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_,
mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_
zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t
msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80),
(0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if
(rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void);
rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if
(!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) ||
!rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2)
!= rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_);
rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_;
if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep
%[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm"
(rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_));
} else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR
%[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0"
(s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;#
dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0"
(s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union {
rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_;
RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_;
(s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ =
__builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_);
s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ =
__builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct {
rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_)
(s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do {
typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; }
RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) =
hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ =
__builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ =
__builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ =
__builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ =
__builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ +
mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ +
mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_;
mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union
{ rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_;
RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_;
(accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist
((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i;
})(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_,
((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_,
((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i;
})(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_,
((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_,
lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_)
{{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)),
((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}};
(accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on
%[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0"
(rc_gsr_fakedep_)); (a11) = accvout_; } while (0); do { rc_vec_t s1_ = (v2);
rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a22); rc_vec_t s1lo7_, s1msb_,
accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ =
(rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_,
s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_
s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_,
s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_
mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_,
mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_
zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t
msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80),
(0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if
(rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void);
rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if
(!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) ||
!rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2)
!= rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_);
rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_;
if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep
%[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm"
(rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_));
} else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR
%[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0"
(s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;#
dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0"
(s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union {
rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_;
RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_;
(s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ =
__builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_);
s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ =
__builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct {
rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_)
(s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do {
typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; }
RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) =
hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ =
__builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ =
__builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ =
__builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ =
__builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ +
mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ +
mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_;
mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union
{ rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_;
RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_;
(accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist
((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i;
})(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_,
((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_,
((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i;
})(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_,
((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_,
lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_)
{{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)),
((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}};
(accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on
%[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0"
(rc_gsr_fakedep_)); (a22) = accvout_; } while (0); do { rc_vec_t s1_ = (v1);
rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a12); rc_vec_t s1lo7_, s1msb_,
accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ =
(rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_,
s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_
s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_,
s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_
mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_,
mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_
zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t
msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80),
(0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if
(rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void);
rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if
(!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) ||
!rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2)
!= rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_);
rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_;
if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep
%[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm"
(rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_));
} else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR
%[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0"
(s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;#
dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0"
(s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union {
rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_;
RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_;
(s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ =
__builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_);
s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ =
__builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct {
rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_)
(s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do {
typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; }
RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) =
hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ =
(rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ =
__builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ =
__builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ =
__builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ =
__builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ +
mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ +
mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_;
mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union
{ rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_;
RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_;
(accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist
((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i;
})(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_,
((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_,
((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i;
})(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_,
((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_,
lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_)
{{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)),
((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}};
(accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on
%[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0"
(rc_gsr_fakedep_)); (a12) = accvout_; } while (0); (i1) += 8; (i2) += 8; }
while (0);
+ }
+ do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v;
uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i;
})(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_,
maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; }
hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) =
hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ +
machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t
machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_,
lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_)
= hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ +
machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t
machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_,
lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_)
= hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ +
machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t)
{0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) =
((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) +=
t12; } while (0);
+
+ do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v;
uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i;
})(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_,
maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; }
hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) =
hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ +
machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t
machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_,
lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_)
= hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ +
machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t
machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_,
lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_)
= hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union {
rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ +
machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t)
{0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) =
((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) +=
t12; } while (0);
+ }
+ sum[0] = s1;
+ sum[1] = s2;
+ sum[2] = s11;
+ sum[3] = s22;
+ sum[4] = s12;
+ ;
+}