diff --git a/gcc/testsuite/gcc.dg/optimize-bswapdi-3.c b/gcc/testsuite/gcc.dg/optimize-bswapdi-3.c
index 0a8bf2e..1fcba5c 100644
--- a/gcc/testsuite/gcc.dg/optimize-bswapdi-3.c
+++ b/gcc/testsuite/gcc.dg/optimize-bswapdi-3.c
@@ -5,60 +5,61 @@
 
 #include <stdint.h>
 
-unsigned char data[8];
+unsigned char data1[8];
 
 struct uint64_st {
   unsigned char u0, u1, u2, u3, u4, u5, u6, u7;
-};
+} data2 __attribute__ ((aligned (8)));
 
 uint64_t read_le64_1 (void)
 {
-  return (uint64_t) data[0] | ((uint64_t) data[1] << 8)
-	 | ((uint64_t) data[2] << 16) | ((uint64_t) data[3] << 24)
-	 | ((uint64_t) data[4] << 32) | ((uint64_t) data[5] << 40)
-	 | ((uint64_t) data[6] << 48) | ((uint64_t) data[7] << 56);
+  return (uint64_t) data1[0] | ((uint64_t) data1[1] << 8)
+	 | ((uint64_t) data1[2] << 16) | ((uint64_t) data1[3] << 24)
+	 | ((uint64_t) data1[4] << 32) | ((uint64_t) data1[5] << 40)
+	 | ((uint64_t) data1[6] << 48) | ((uint64_t) data1[7] << 56);
 }
 
-uint64_t read_le64_2 (struct uint64_st data)
+uint64_t read_le64_2 (void)
 {
-  return (uint64_t) data.u0 | ((uint64_t) data.u1 << 8)
-	 | ((uint64_t) data.u2 << 16) | ((uint64_t) data.u3 << 24)
-	 | ((uint64_t) data.u4 << 32) | ((uint64_t) data.u5 << 40)
-	 | ((uint64_t) data.u6 << 48) | ((uint64_t) data.u7 << 56);
+  return (uint64_t) data2.u0 | ((uint64_t) data2.u1 << 8)
+	 | ((uint64_t) data2.u2 << 16) | ((uint64_t) data2.u3 << 24)
+	 | ((uint64_t) data2.u4 << 32) | ((uint64_t) data2.u5 << 40)
+	 | ((uint64_t) data2.u6 << 48) | ((uint64_t) data2.u7 << 56);
 }
 
-uint64_t read_le64_3 (unsigned char *data)
+uint64_t read_le64_3 (unsigned char *data3)
 {
-  return (uint64_t) *data | ((uint64_t) *(data + 1) << 8)
-	 | ((uint64_t) *(data + 2) << 16) | ((uint64_t) *(data + 3) << 24)
-	 | ((uint64_t) *(data + 4) << 32) | ((uint64_t) *(data + 5) << 40)
-	 | ((uint64_t) *(data + 6) << 48) | ((uint64_t) *(data + 7) << 56);
+  return (uint64_t) *data3 | ((uint64_t) *(data3 + 1) << 8)
+	 | ((uint64_t) *(data3 + 2) << 16) | ((uint64_t) *(data3 + 3) << 24)
+	 | ((uint64_t) *(data3 + 4) << 32) | ((uint64_t) *(data3 + 5) << 40)
+	 | ((uint64_t) *(data3 + 6) << 48) | ((uint64_t) *(data3 + 7) << 56);
 }
 
 uint64_t read_be64_1 (void)
 {
-  return (uint64_t) data[7] | ((uint64_t) data[6] << 8)
-	 | ((uint64_t) data[5] << 16) | ((uint64_t) data[4] << 24)
-	 | ((uint64_t) data[3] << 32) | ((uint64_t) data[2] << 40)
-	 | ((uint64_t) data[1] << 48) | ((uint64_t) data[0] << 56);
+  return (uint64_t) data1[7] | ((uint64_t) data1[6] << 8)
+	 | ((uint64_t) data1[5] << 16) | ((uint64_t) data1[4] << 24)
+	 | ((uint64_t) data1[3] << 32) | ((uint64_t) data1[2] << 40)
+	 | ((uint64_t) data1[1] << 48) | ((uint64_t) data1[0] << 56);
 }
 
-uint64_t read_be64_2 (struct uint64_st data)
+uint64_t read_be64_2 (void)
 {
-  return (uint64_t) data.u7 | ((uint64_t) data.u6 << 8)
-	 | ((uint64_t) data.u5 << 16) | ((uint64_t) data.u4 << 24)
-	 | ((uint64_t) data.u3 << 32) | ((uint64_t) data.u2 << 40)
-	 | ((uint64_t) data.u1 << 48) | ((uint64_t) data.u0 << 56);
+  return (uint64_t) data2.u7 | ((uint64_t) data2.u6 << 8)
+	 | ((uint64_t) data2.u5 << 16) | ((uint64_t) data2.u4 << 24)
+	 | ((uint64_t) data2.u3 << 32) | ((uint64_t) data2.u2 << 40)
+	 | ((uint64_t) data2.u1 << 48) | ((uint64_t) data2.u0 << 56);
 }
 
-uint64_t read_be64_3 (unsigned char *data)
+uint64_t read_be64_3 (unsigned char *data3)
 {
-  return (uint64_t) *(data + 7) | ((uint64_t) *(data + 6) << 8)
-	 | ((uint64_t) *(data + 5) << 16) | ((uint64_t) *(data + 4) << 24)
-	 | ((uint64_t) *(data + 3) << 32) | ((uint64_t) *(data + 2) << 40)
-	 | ((uint64_t) *(data + 1) << 48) | ((uint64_t) *data << 56);
+  return (uint64_t) *(data3 + 7) | ((uint64_t) *(data3 + 6) << 8)
+	 | ((uint64_t) *(data3 + 5) << 16) | ((uint64_t) *(data3 + 4) << 24)
+	 | ((uint64_t) *(data3 + 3) << 32) | ((uint64_t) *(data3 + 2) << 40)
+	 | ((uint64_t) *(data3 + 1) << 48) | ((uint64_t) *data3 << 56);
 }
 
-/* { dg-final { scan-tree-dump-times "64 bit load in host endianness found at" 3 "bswap" } } */
-/* { dg-final { scan-tree-dump-times "64 bit bswap implementation found at" 3 "bswap" { xfail alpha*-*-* arm*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "64 bit load in host endianness found at" 3 "bswap" { target { ! "alpha*-*-* arm*-*-*" } } } } */
+/* { dg-final { scan-tree-dump-times "64 bit load in host endianness found at" 2 "bswap" { target alpha*-*-* arm*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "64 bit bswap implementation found at" 3 "bswap" { target { ! "alpha*-*-* arm*-*-*" } } } } */
 /* { dg-final { cleanup-tree-dump "bswap" } } */
diff --git a/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c b/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
index 65bff98..e2ec98e 100644
--- a/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
+++ b/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
@@ -6,42 +6,43 @@
 
 #include <stdint.h>
 
-unsigned char data[2];
+unsigned char data1[2] __attribute__ ((aligned (2)));
 
 struct uint16_st {
   unsigned char u0, u1;
-};
+} data2 __attribute__ ((aligned (2)));
 
 uint32_t read_le16_1 (void)
 {
-  return data[0] | (data[1] << 8);
+  return data1[0] | (data1[1] << 8);
 }
 
-uint32_t read_le16_2 (struct uint16_st data)
+uint32_t read_le16_2 (void)
 {
-  return data.u0 | (data.u1 << 8);
+  return data2.u0 | (data2.u1 << 8);
 }
 
-uint32_t read_le16_3 (unsigned char *data)
+uint32_t read_le16_3 (unsigned char *data3)
 {
-  return *data | (*(data + 1) << 8);
+  return *data3 | (*(data3 + 1) << 8);
 }
 
 uint32_t read_be16_1 (void)
 {
-  return data[1] | (data[0] << 8);
+  return data1[1] | (data1[0] << 8);
 }
 
-uint32_t read_be16_2 (struct uint16_st data)
+uint32_t read_be16_2 (void)
 {
-  return data.u1 | (data.u0 << 8);
+  return data2.u1 | (data2.u0 << 8);
 }
 
-uint32_t read_be16_3 (unsigned char *data)
+uint32_t read_be16_3 (unsigned char *data3)
 {
-  return *(data + 1) | (*data << 8);
+  return *(data3 + 1) | (*data3 << 8);
 }
 
-/* { dg-final { scan-tree-dump-times "16 bit load in host endianness found at" 3 "bswap" } } */
-/* { dg-final { scan-tree-dump-times "16 bit bswap implementation found at" 3 "bswap" { xfail alpha*-*-* arm*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "16 bit load in host endianness found at" 3 "bswap" { target { ! "alpha*-*-* arm*-*-*" } } } } */
+/* { dg-final { scan-tree-dump-times "16 bit load in host endianness found at" 2 "bswap" { target alpha*-*-* arm*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "16 bit bswap implementation found at" 3 "bswap" { target { ! "alpha*-*-* arm*-*-*" } } } } */
 /* { dg-final { cleanup-tree-dump "bswap" } } */
diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c b/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
index 518b510..e8aa1f9 100644
--- a/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
+++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
@@ -6,44 +6,45 @@
 
 #include <stdint.h>
 
-extern unsigned char data[4];
+extern unsigned char data1[4] __attribute__ ((aligned (4)));
 
 struct uint32_st {
   unsigned char u0, u1, u2, u3;
-};
+} data2 __attribute__ ((aligned (4)));
 
 uint32_t read_le32_1 (void)
 {
-  return data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
+  return data1[0] | (data1[1] << 8) | (data1[2] << 16) | (data1[3] << 24);
 }
 
-uint32_t read_le32_2 (struct uint32_st data)
+uint32_t read_le32_2 (void)
 {
-  return data.u0 | (data.u1 << 8) | (data.u2 << 16) | (data.u3 << 24);
+  return data2.u0 | (data2.u1 << 8) | (data2.u2 << 16) | (data2.u3 << 24);
 }
 
-uint32_t read_le32_3 (unsigned char *data)
+uint32_t read_le32_3 (unsigned char *data3)
 {
-  return *data | (*(data + 1) << 8) | (*(data + 2) << 16)
-	 | (*(data + 3) << 24);
+  return *data3 | (*(data3 + 1) << 8) | (*(data3 + 2) << 16)
+	 | (*(data3 + 3) << 24);
 }
 
 uint32_t read_be32_1 (void)
 {
-  return data[3] | (data[2] << 8) | (data[1] << 16) | (data[0] << 24);
+  return data1[3] | (data1[2] << 8) | (data1[1] << 16) | (data1[0] << 24);
 }
 
-uint32_t read_be32_2 (struct uint32_st data)
+uint32_t read_be32_2 (void)
 {
-  return data.u3 | (data.u2 << 8) | (data.u1 << 16) | (data.u0 << 24);
+  return data2.u3 | (data2.u2 << 8) | (data2.u1 << 16) | (data2.u0 << 24);
 }
 
-uint32_t read_be32_3 (unsigned char *data)
+uint32_t read_be32_3 (unsigned char *data3)
 {
-  return *(data + 3) | (*(data + 2) << 8) | (*(data + 1) << 16)
-	 | (*data << 24);
+  return *(data3 + 3) | (*(data3 + 2) << 8) | (*(data3 + 1) << 16)
+	 | (*data3 << 24);
 }
 
-/* { dg-final { scan-tree-dump-times "32 bit load in host endianness found at" 3 "bswap" } } */
-/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 3 "bswap" { xfail alpha*-*-* arm*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "32 bit load in host endianness found at" 3 "bswap" { target { ! "alpha*-*-* arm*-*-*" } } } } */
+/* { dg-final { scan-tree-dump-times "32 bit load in host endianness found at" 2 "bswap" { target alpha*-*-* arm*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 3 "bswap" { target { ! "alpha*-*-* arm*-*-*" } } } } */
 /* { dg-final { cleanup-tree-dump "bswap" } } */
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 4c0b808..941343c 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2223,8 +2223,11 @@ bswap_replace (gimple stmt, gimple_stmt_iterator *gsi, tree src, tree fndecl,
       gimple addr_stmt, load_stmt;
       unsigned align;
 
+      load_type = bswap ? bswap_type : range_type;
       align = get_object_alignment (src);
-      if (bswap && SLOW_UNALIGNED_ACCESS (TYPE_MODE (range_type), align))
+
+      if (align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type)) &&
+	  SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
 	return false;
 
       /*  Compute address to load from and cast according to the size
@@ -2241,7 +2244,6 @@ bswap_replace (gimple stmt, gimple_stmt_iterator *gsi, tree src, tree fndecl,
 	}
 
       /* Perform the load.  */
-      load_type = bswap ? bswap_type : range_type;
       aligned_load_type = load_type;
       if (align < TYPE_ALIGN (load_type))
 	aligned_load_type = build_aligned_type (load_type, align);
