I'm seeing the oddest thing with a function compiled like:
mpicc -std=gnu99 -O1 -g -m32 -pthread -msse -mno-sse2 -DHAVE_CONFIG_H
-I../../easel -I../../easel -I. -I.. -I. -I../../src -o fwdback.o -c
fwdback.c
using both gcc versions
gcc (GCC) 4.4.1 (on a 64 bit linux)
gcc (GCC) 4.2.3 (4.2.3-6mnb1) (on a 32 bit linux)
on OMPI 1.4.3.
The compilers are on Opterons, the worker node where it fails is
an Athlon MP. (Shouldn't be any differene with -mno-sse2 off, right?)
Basically it comes down to (many lines of code omitted)
register __m128 xEv;
fprintf(stderr,"DEBUG0 xEV %lld\n",xEv);fflush(stderr);
xEv = _mm_setzero_ps();
fprintf(stderr,"DEBUGB xEV %lld\n",xEv);fflush(stderr); /* problem */
throwing an error when run in Valgrind in a particular program at the
second printf.
==13053== Conditional jump or move depends on uninitialised value(s)
==13053== at 0x4BE50BC: vfprintf (in /lib/libc-2.10.1.so)
==13053== by 0x4BE9411: ??? (in /lib/libc-2.10.1.so)
==13053== by 0x4BE4492: vfprintf (in /lib/libc-2.10.1.so)
==13053== by 0x4BEE7CE: fprintf (in /lib/libc-2.10.1.so)
==13053== by 0x807FC19: forward_engine (fwdback.c:305) <--------
==13053== by 0x8080289: p7_ForwardParser (fwdback.c:143)
==13053== by 0x8075B08: p7_Tau (evalues.c:442)
==13053== by 0x8076554: p7_Calibrate (evalues.c:109)
==13053== by 0x8061815: calibrate (p7_builder.c:629)
==13053== by 0x80618D6: p7_SingleBuilder (p7_builder.c:393)
==13053== by 0x80570C9: main (jackhmmer.c:1068)
How can xEv possibly be uninitialized in that position? Note the
problem initially manifested much further down in the code here
_mm_store_ss(&xE, xEv);
As far as Valgrind is concerned it starts right after the _mm_setzero_ps().
After my signature is that function from the start down to the DEBUGB
line with all lines present - sorry about the wrap:
David Mathog
[email protected]
Manager, Sequence Analysis Facility, Biology Division, Caltech
-------------------------------------------------------------
static int
forward_engine(int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE
*om, P7_OMX *ox, float *opt_sc)
{
register __m128 mpv, dpv, ipv; /* previous row values
*/
register __m128 sv; /* temp storage of 1 curr row value in
progress */
register __m128 dcv; /* delayed storage of D(i,q+1)
*/
register __m128 xEv; /* E state: keeps max for Mk->E as we go
*/
register __m128 xBv; /* B state: splatted vector of B[i-1] for
B->Mk calculations */
__m128 zerov; /* splatted 0.0's in a vector
*/
float xN, xE, xB, xC, xJ; /* special states' scores
*/
int i; /* counter over sequence positions 1..L
*/
int q; /* counter over quads 0..nq-1
*/
int j; /* counter over DD iterations (4 is full
serialization)
*/
int Q = p7O_NQF(om->M); /* segment length: # of vectors
*/
__m128 *dpc = ox->dpf[0]; /* current row, for use in
{MDI}MO(dpp,q) access macro */
__m128 *dpp; /* previous row, for use in
{MDI}MO(dpp,q) access macro */
__m128 *rp; /* will point at om->rfv[x] for residue x[i]
*/
__m128 *tp; /* will point into (and step thru) om->tfv
*/
/* Initialization. */
ox->M = om->M;
ox->L = L;
ox->has_own_scales = TRUE; /* all forward matrices control their own
scalefactors */
zerov = _mm_setzero_ps();
for (q = 0; q < Q; q++)
MMO(dpc,q) = IMO(dpc,q) = DMO(dpc,q) = zerov;
xE = ox->xmx[p7X_E] = 0.;
xN = ox->xmx[p7X_N] = 1.;
xJ = ox->xmx[p7X_J] = 0.;
xB = ox->xmx[p7X_B] = om->xf[p7O_N][p7O_MOVE];
xC = ox->xmx[p7X_C] = 0.;
ox->xmx[p7X_SCALE] = 1.0;
ox->totscale = 0.0;
#if p7_DEBUGGING
if (ox->debugging) p7_omx_DumpFBRow(ox, TRUE, 0, 9, 5, xE, xN, xJ,
xB, xC); /* logify=TRUE, <rowi>=0, width=8, precision=5*/
#endif
for (i = 1; i <= L; i++)
{
fprintf(stderr,"DEBUGA i %d\n",i);fflush(stderr);
dpp = dpc;
dpc = ox->dpf[do_full * i]; /* avoid conditional, use
do_full as kronecker delta */
rp = om->rfv[dsq[i]];
tp = om->tfv;
dcv = _mm_setzero_ps();
xEv = _mm_setzero_ps();
fprintf(stderr,"DEBUGB xEV %lld\n",xEv);fflush(stderr);