Package: htslib
Severify: important
Tags: patch

htslib is failing to build in Debian armhf and raspbian.

I reproduced the bug locally in a raspbian stretch environment and got the following backtrace.

root@odroidu2:/htslib-1.2.1# gdb --args test/sam test/ce.fa
GNU gdb (Raspbian 7.10-1) 7.10
Copyright (C) 2015 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "arm-linux-gnueabihf".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from test/sam...done.
(gdb) run
Starting program: /htslib-1.2.1/test/sam test/ce.fa
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/arm-linux-gnueabihf/libthread_db.so.1".

Program received signal SIGBUS, Bus error.
0x00025640 in bam_aux2f (
    s=0xa967e "\333\017I@XddiW\024\213\n\277\005@XZZHello, world!",
    s@entry=0xa967d "f\333\017I@XddiW\024\213\n\277\005@XZZHello, world!")
    at sam.c:1181
1181        else if (type == 'f') return *(float*)s;
(gdb)

This is an alignment issue. VFP requires floating point loads and stores to be naturally aligned but s is only aligned to 2 bytes whiile a float is 4 bytes. Looking further at the code reveals the reason for the unalignment. It seems that the code is reading from a data format where values are prefixed with single bytes practically gauranteeing that many of the accesses will be unaligned.

In portable code unaligned accesses should be avoided. Sometimes they will give the right result, sometimes they will give bus errors, sometimes they will silently give wrong answers. To a large extent the behaviour is driven by architecture (and sometimes version of the architecture) but it can also be driven by what instructions the compiler choses to use. Even if it does the right thing it may do so very slowly. x86 tends to be the most forgiving architecture when it comes to unaligned accesses.

The attatched patch replaces a number of unaligned accesses in sam.c with memcpy calls. It resulted in a successful build on raspbian stretch. I did not include any conditional logic but it would be easy to add conditional logic to only use memcpy on non-x86 targets if that was considered desirable (I do not if memcpy is faster or slower than letting the x86 cpu fixup the unaligned accesses in hardware and if-so whether the difference is likely to be significant).


diff -Nru htslib-1.2.1/debian/changelog htslib-1.2.1/debian/changelog
--- htslib-1.2.1/debian/changelog       2015-05-25 04:35:50.000000000 +0000
+++ htslib-1.2.1/debian/changelog       2015-09-29 19:03:13.000000000 +0000
@@ -1,3 +1,9 @@
+htslib (1.2.1-1+rpi1) stretch-staging; urgency=medium
+
+  * Fix bus errors in testsuite.
+
+ -- Peter Michael Green <plugw...@raspbian.org>  Tue, 29 Sep 2015 19:03:00 
+0000
+
 htslib (1.2.1-1) unstable; urgency=medium
 
   0662716 Merge tag '1.2.1' into debian/unstable
diff -Nru htslib-1.2.1/debian/patches/alignment.patch 
htslib-1.2.1/debian/patches/alignment.patch
--- htslib-1.2.1/debian/patches/alignment.patch 1970-01-01 00:00:00.000000000 
+0000
+++ htslib-1.2.1/debian/patches/alignment.patch 2015-09-29 19:20:45.000000000 
+0000
@@ -0,0 +1,119 @@
+Description: Fix alignment issues
+ Fix alignment issues identified through test failure with "BUS error" when
+ building on Debian armhf and raspbian.
+Author: Peter Michael Green <plugw...@raspbian.org>
+
+---
+The information above should follow the Patch Tagging Guidelines, please
+checkout http://dep.debian.net/deps/dep3/ to learn about the format. Here
+are templates for supplementary fields that you might want to add:
+
+Origin: <vendor|upstream|other>, <url of original patch>
+Bug: <url in upstream bugtracker>
+Bug-Debian: https://bugs.debian.org/<bugnumber>
+Bug-Ubuntu: https://launchpad.net/bugs/<bugnumber>
+Forwarded: <no|not-needed|url proving that it has been forwarded>
+Reviewed-By: <name and email of someone who approved the patch>
+Last-Update: <YYYY-MM-DD>
+
+Index: htslib-1.2.1/sam.c
+===================================================================
+--- htslib-1.2.1.orig/sam.c
++++ htslib-1.2.1/sam.c
+@@ -947,6 +947,12 @@ err_recover:
+     }
+ }
+ 
++#define READUNALIGNED(ptr,type) ({ \
++    type tmp;\
++    memcpy(tmp,ptr,sizeof(type));\
++    tmp;\
++})
++
+ int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str)
+ {
+     int i;
+@@ -1007,36 +1013,36 @@ int sam_format1(const bam_hdr_t *h, cons
+         } else if (type == 'S') {
+             if (s+2 <= b->data + b->l_data) {
+                 kputsn("i:", 2, str);
+-                kputw(*(uint16_t*)s, str);
++                kputw(READUNALIGNED(s,uint16_t), str);
+                 s += 2;
+             } else return -1;
+         } else if (type == 's') {
+             if (s+2 <= b->data + b->l_data) {
+                 kputsn("i:", 2, str);
+-                kputw(*(int16_t*)s, str);
++                kputw(READUNALIGNED(s,int16_t), str);
+                 s += 2;
+             } else return -1;
+         } else if (type == 'I') {
+             if (s+4 <= b->data + b->l_data) {
+                 kputsn("i:", 2, str);
+-                kputuw(*(uint32_t*)s, str);
++                kputuw(READUNALIGNED(s,uint32_t), str);
+                 s += 4;
+             } else return -1;
+         } else if (type == 'i') {
+             if (s+4 <= b->data + b->l_data) {
+                 kputsn("i:", 2, str);
+-                kputw(*(int32_t*)s, str);
++                kputw(READUNALIGNED(s,int32_t), str);
+                 s += 4;
+             } else return -1;
+         } else if (type == 'f') {
+             if (s+4 <= b->data + b->l_data) {
+-                ksprintf(str, "f:%g", *(float*)s);
++                ksprintf(str, "f:%g", READUNALIGNED(s,float));
+                 s += 4;
+             } else return -1;
+ 
+         } else if (type == 'd') {
+             if (s+8 <= b->data + b->l_data) {
+-                ksprintf(str, "d:%g", *(double*)s);
++                ksprintf(str, "d:%g", READUNALIGNED(s,double));
+                 s += 8;
+             } else return -1;
+         } else if (type == 'Z' || type == 'H') {
+@@ -1057,11 +1063,11 @@ int sam_format1(const bam_hdr_t *h, cons
+                 kputc(',', str);
+                 if ('c' == sub_type)      { kputw(*(int8_t*)s, str); ++s; }
+                 else if ('C' == sub_type) { kputw(*(uint8_t*)s, str); ++s; }
+-                else if ('s' == sub_type) { kputw(*(int16_t*)s, str); s += 2; 
}
+-                else if ('S' == sub_type) { kputw(*(uint16_t*)s, str); s += 
2; }
+-                else if ('i' == sub_type) { kputw(*(int32_t*)s, str); s += 4; 
}
+-                else if ('I' == sub_type) { kputuw(*(uint32_t*)s, str); s += 
4; }
+-                else if ('f' == sub_type) { ksprintf(str, "%g", *(float*)s); 
s += 4; }
++                else if ('s' == sub_type) { kputw(READUNALIGNED(s,int16_t), 
str); s += 2; }
++                else if ('S' == sub_type) { kputw(READUNALIGNED(s,uint16_t), 
str); s += 2; }
++                else if ('i' == sub_type) { kputw(READUNALIGNED(s,int32_t), 
str); s += 4; }
++                else if ('I' == sub_type) { kputuw(READUNALIGNED(s,uint32_t), 
str); s += 4; }
++                else if ('f' == sub_type) { ksprintf(str, "%g", 
READUNALIGNED(s,float)); s += 4; }
+             }
+         }
+     }
+@@ -1167,9 +1173,9 @@ int32_t bam_aux2i(const uint8_t *s)
+     type = *s++;
+     if (type == 'c') return (int32_t)*(int8_t*)s;
+     else if (type == 'C') return (int32_t)*(uint8_t*)s;
+-    else if (type == 's') return (int32_t)*(int16_t*)s;
+-    else if (type == 'S') return (int32_t)*(uint16_t*)s;
+-    else if (type == 'i' || type == 'I') return *(int32_t*)s;
++    else if (type == 's') return (int32_t)READUNALIGNED(s,int16_t);
++    else if (type == 'S') return (int32_t)READUNALIGNED(s,uint16_t);
++    else if (type == 'i' || type == 'I') return READUNALIGNED(s,int32_t);
+     else return 0;
+ }
+ 
+@@ -1177,8 +1183,8 @@ double bam_aux2f(const uint8_t *s)
+ {
+     int type;
+     type = *s++;
+-    if (type == 'd') return *(double*)s;
+-    else if (type == 'f') return *(float*)s;
++    if (type == 'd') return READUNALIGNED(s,double);
++    else if (type == 'f') return READUNALIGNED(s,float);
+     else return 0.0;
+ }
+ 
diff -Nru htslib-1.2.1/debian/patches/debian-changes 
htslib-1.2.1/debian/patches/debian-changes
--- htslib-1.2.1/debian/patches/debian-changes  2015-05-25 04:42:16.000000000 
+0000
+++ htslib-1.2.1/debian/patches/debian-changes  2015-09-29 19:21:40.000000000 
+0000
@@ -70,3 +70,102 @@
 +```
 +
 +[download]: http://www.htslib.org/download/
+--- htslib-1.2.1.orig/sam.c
++++ htslib-1.2.1/sam.c
+@@ -947,6 +947,12 @@ err_recover:
+     }
+ }
+ 
++#define READUNALIGNED(ptr,type) ({ \
++    type tmp;\
++    memcpy(&tmp,ptr,sizeof(type));\
++    tmp;\
++})
++
+ int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str)
+ {
+     int i;
+@@ -1007,36 +1013,36 @@ int sam_format1(const bam_hdr_t *h, cons
+         } else if (type == 'S') {
+             if (s+2 <= b->data + b->l_data) {
+                 kputsn("i:", 2, str);
+-                kputw(*(uint16_t*)s, str);
++                kputw(READUNALIGNED(s,uint16_t), str);
+                 s += 2;
+             } else return -1;
+         } else if (type == 's') {
+             if (s+2 <= b->data + b->l_data) {
+                 kputsn("i:", 2, str);
+-                kputw(*(int16_t*)s, str);
++                kputw(READUNALIGNED(s,int16_t), str);
+                 s += 2;
+             } else return -1;
+         } else if (type == 'I') {
+             if (s+4 <= b->data + b->l_data) {
+                 kputsn("i:", 2, str);
+-                kputuw(*(uint32_t*)s, str);
++                kputuw(READUNALIGNED(s,uint32_t), str);
+                 s += 4;
+             } else return -1;
+         } else if (type == 'i') {
+             if (s+4 <= b->data + b->l_data) {
+                 kputsn("i:", 2, str);
+-                kputw(*(int32_t*)s, str);
++                kputw(READUNALIGNED(s,int32_t), str);
+                 s += 4;
+             } else return -1;
+         } else if (type == 'f') {
+             if (s+4 <= b->data + b->l_data) {
+-                ksprintf(str, "f:%g", *(float*)s);
++                ksprintf(str, "f:%g", READUNALIGNED(s,float));
+                 s += 4;
+             } else return -1;
+ 
+         } else if (type == 'd') {
+             if (s+8 <= b->data + b->l_data) {
+-                ksprintf(str, "d:%g", *(double*)s);
++                ksprintf(str, "d:%g", READUNALIGNED(s,double));
+                 s += 8;
+             } else return -1;
+         } else if (type == 'Z' || type == 'H') {
+@@ -1057,11 +1063,11 @@ int sam_format1(const bam_hdr_t *h, cons
+                 kputc(',', str);
+                 if ('c' == sub_type)      { kputw(*(int8_t*)s, str); ++s; }
+                 else if ('C' == sub_type) { kputw(*(uint8_t*)s, str); ++s; }
+-                else if ('s' == sub_type) { kputw(*(int16_t*)s, str); s += 2; 
}
+-                else if ('S' == sub_type) { kputw(*(uint16_t*)s, str); s += 
2; }
+-                else if ('i' == sub_type) { kputw(*(int32_t*)s, str); s += 4; 
}
+-                else if ('I' == sub_type) { kputuw(*(uint32_t*)s, str); s += 
4; }
+-                else if ('f' == sub_type) { ksprintf(str, "%g", *(float*)s); 
s += 4; }
++                else if ('s' == sub_type) { kputw(READUNALIGNED(s,int16_t), 
str); s += 2; }
++                else if ('S' == sub_type) { kputw(READUNALIGNED(s,uint16_t), 
str); s += 2; }
++                else if ('i' == sub_type) { kputw(READUNALIGNED(s,int32_t), 
str); s += 4; }
++                else if ('I' == sub_type) { kputuw(READUNALIGNED(s,uint32_t), 
str); s += 4; }
++                else if ('f' == sub_type) { ksprintf(str, "%g", 
READUNALIGNED(s,float)); s += 4; }
+             }
+         }
+     }
+@@ -1167,9 +1173,9 @@ int32_t bam_aux2i(const uint8_t *s)
+     type = *s++;
+     if (type == 'c') return (int32_t)*(int8_t*)s;
+     else if (type == 'C') return (int32_t)*(uint8_t*)s;
+-    else if (type == 's') return (int32_t)*(int16_t*)s;
+-    else if (type == 'S') return (int32_t)*(uint16_t*)s;
+-    else if (type == 'i' || type == 'I') return *(int32_t*)s;
++    else if (type == 's') return (int32_t)READUNALIGNED(s,int16_t);
++    else if (type == 'S') return (int32_t)READUNALIGNED(s,uint16_t);
++    else if (type == 'i' || type == 'I') return READUNALIGNED(s,int32_t);
+     else return 0;
+ }
+ 
+@@ -1177,8 +1183,8 @@ double bam_aux2f(const uint8_t *s)
+ {
+     int type;
+     type = *s++;
+-    if (type == 'd') return *(double*)s;
+-    else if (type == 'f') return *(float*)s;
++    if (type == 'd') return READUNALIGNED(s,double);
++    else if (type == 'f') return READUNALIGNED(s,float);
+     else return 0.0;
+ }
+ 
diff -Nru htslib-1.2.1/debian/patches/series htslib-1.2.1/debian/patches/series
--- htslib-1.2.1/debian/patches/series  2015-05-25 04:03:55.000000000 +0000
+++ htslib-1.2.1/debian/patches/series  2015-09-29 18:51:28.000000000 +0000
@@ -1 +1,2 @@
 debian-changes
+alignment.patch

Reply via email to