diff --git a/gcc/expr.c b/gcc/expr.c
index fec6194..864ee20 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -6669,7 +6669,10 @@ store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos,
 
 /* Given an expression EXP that may be a COMPONENT_REF, a BIT_FIELD_REF,
    an ARRAY_REF, or an ARRAY_RANGE_REF, look for nested operations of these
-   codes and find the ultimate containing object, which we return.
+   codes and find the ultimate containing object, which we return. If that
+   object is a MEM_REF that is not the address of an object, ALLOW_ADDRESS
+   decides whether the pointer is returned instead of the memory reference.
+   Note that the behavior for TARGET_MEM_REF is not affected by this parameter.
 
    We set *PBITSIZE to the size in bits that we want, *PBITPOS to the
    bit position, and *PUNSIGNEDP to the signedness of the field.
@@ -6706,7 +6709,7 @@ tree
 get_inner_reference (tree exp, HOST_WIDE_INT *pbitsize,
 		     HOST_WIDE_INT *pbitpos, tree *poffset,
 		     enum machine_mode *pmode, int *punsignedp,
-		     int *pvolatilep, bool keep_aligning)
+		     int *pvolatilep, bool keep_aligning, bool allow_address)
 {
   tree size_tree = 0;
   enum machine_mode mode = VOIDmode;
@@ -6838,7 +6841,7 @@ get_inner_reference (tree exp, HOST_WIDE_INT *pbitsize,
 
 	case MEM_REF:
 	  /* Hand back the decl for MEM[&decl, off].  */
-	  if (TREE_CODE (TREE_OPERAND (exp, 0)) == ADDR_EXPR)
+	  if (TREE_CODE (TREE_OPERAND (exp, 0)) == ADDR_EXPR || allow_address)
 	    {
 	      tree off = TREE_OPERAND (exp, 1);
 	      if (!integer_zerop (off))
@@ -6848,7 +6851,10 @@ get_inner_reference (tree exp, HOST_WIDE_INT *pbitsize,
 				      ? 3 : exact_log2 (BITS_PER_UNIT));
 		  bit_offset += boff;
 		}
-	      exp = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
+	      if (TREE_CODE (TREE_OPERAND (exp, 0)) == ADDR_EXPR)
+		exp = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
+	      else
+		exp = TREE_OPERAND (exp, 0);
 	    }
 	  goto done;
 
diff --git a/gcc/testsuite/gcc.c-torture/execute/bswap-2.c b/gcc/testsuite/gcc.c-torture/execute/bswap-2.c
new file mode 100644
index 0000000..e91b487
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/bswap-2.c
@@ -0,0 +1,90 @@
+#ifdef __UINT32_TYPE__
+typedef __UINT32_TYPE__ uint32_t;
+#else
+typedef __UINT32_TYPE__ unsigned;
+#endif
+
+struct bitfield {
+  unsigned char f0:7;
+  unsigned char f1:7;
+  unsigned char f2:7;
+  unsigned char f3:7;
+};
+
+struct ok {
+  unsigned char f0;
+  unsigned char f1;
+  unsigned char f2;
+  unsigned char f3;
+};
+
+union bf_or_uint32 {
+  struct ok inval;
+  struct bitfield bfval;
+};
+
+__attribute__ ((noinline, noclone)) uint32_t
+partial_read_le32 (union bf_or_uint32 in)
+{
+  return in.bfval.f0 | (in.bfval.f1 << 8)
+	 | (in.bfval.f2 << 16) | (in.bfval.f3 << 24);
+}
+
+__attribute__ ((noinline, noclone)) uint32_t
+partial_read_be32 (union bf_or_uint32 in)
+{
+  return in.bfval.f3 | (in.bfval.f2 << 8)
+	 | (in.bfval.f1 << 16) | (in.bfval.f0 << 24);
+}
+
+__attribute__ ((noinline, noclone)) uint32_t
+fake_read_le32 (char *x, char *y)
+{
+  unsigned char c0, c1, c2, c3;
+
+  c0 = x[0];
+  c1 = x[1];
+  *y = 1;
+  c2 = x[2];
+  c3 = x[3];
+  return c0 | c1 << 8 | c2 << 16 | c3 << 24;
+}
+
+__attribute__ ((noinline, noclone)) uint32_t
+fake_read_be32 (char *x, char *y)
+{
+  unsigned char c0, c1, c2, c3;
+
+  c0 = x[0];
+  c1 = x[1];
+  *y = 1;
+  c2 = x[2];
+  c3 = x[3];
+  return c3 | c2 << 8 | c1 << 16 | c0 << 24;
+}
+
+int
+main ()
+{
+  union bf_or_uint32 bfin;
+  uint32_t out;
+  char cin[] = { 0x83, 0x85, 0x87, 0x89 };
+
+  if (sizeof (uint32_t) * __CHAR_BIT__ != 32)
+    return 0;
+  bfin.inval = (struct ok) { 0x83, 0x85, 0x87, 0x89 };
+  out = partial_read_le32 (bfin);
+  if (out != 0x09070503 && out != 0x88868482)
+    __builtin_abort ();
+  bfin.inval = (struct ok) { 0x83, 0x85, 0x87, 0x89 };
+  out = partial_read_be32 (bfin);
+  if (out != 0x03050709 && out != 0x82848688)
+    __builtin_abort ();
+  out = fake_read_le32 (cin, &cin[2]);
+  if (out != 0x89018583)
+    __builtin_abort ();
+  out = fake_read_be32 (cin, &cin[2]);
+  if (out != 0x83850189)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/optimize-bswapdi-3.c b/gcc/testsuite/gcc.dg/optimize-bswapdi-3.c
new file mode 100644
index 0000000..bca7b4d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/optimize-bswapdi-3.c
@@ -0,0 +1,81 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target bswap64 } */
+/* { dg-require-effective-target stdint_types } */
+/* { dg-options "-O2 -fdump-tree-bswap" } */
+
+#include <stdint.h>
+
+struct uint64_st {
+  unsigned char u0, u1, u2, u3, u4, u5, u6, u7;
+};
+
+uint32_t read_aux (void *, uint32_t);
+
+uint64_t read_le64_1 (void)
+{
+  unsigned char data[8];
+
+  read_aux (data, 8);
+  return (uint64_t) data[0] | ((uint64_t) data[1] << 8)
+	 | ((uint64_t) data[2] << 16) | ((uint64_t) data[3] << 24)
+	 | ((uint64_t) data[4] << 32) | ((uint64_t) data[5] << 40)
+	 | ((uint64_t) data[6] << 48) | ((uint64_t) data[7] << 56);
+}
+
+uint64_t read_le64_2 (void)
+{
+  struct uint64_st data;
+
+  read_aux (&data, 8);
+  return (uint64_t) data.u0 | ((uint64_t) data.u1 << 8)
+	 | ((uint64_t) data.u2 << 16) | ((uint64_t) data.u3 << 24)
+	 | ((uint64_t) data.u4 << 32) | ((uint64_t) data.u5 << 40)
+	 | ((uint64_t) data.u6 << 48) | ((uint64_t) data.u7 << 56);
+}
+
+uint64_t read_le64_3 (void)
+{
+  unsigned char *data;
+
+  read_aux (data, 8);
+  return (uint64_t) *data | ((uint64_t) *(data + 1) << 8)
+	 | ((uint64_t) *(data + 2) << 16) | ((uint64_t) *(data + 3) << 24)
+	 | ((uint64_t) *(data + 4) << 32) | ((uint64_t) *(data + 5) << 40)
+	 | ((uint64_t) *(data + 6) << 48) | ((uint64_t) *(data + 7) << 56);
+}
+
+uint64_t read_be64_1 (void)
+{
+  unsigned char data[8];
+
+  read_aux (data, 8);
+  return (uint64_t) data[7] | ((uint64_t) data[6] << 8)
+	 | ((uint64_t) data[5] << 16) | ((uint64_t) data[4] << 24)
+	 | ((uint64_t) data[3] << 32) | ((uint64_t) data[2] << 40)
+	 | ((uint64_t) data[1] << 48) | ((uint64_t) data[0] << 56);
+}
+
+uint64_t read_be64_2 (void)
+{
+  struct uint64_st data;
+
+  read_aux (&data, 8);
+  return (uint64_t) data.u7 | ((uint64_t) data.u6 << 8)
+	 | ((uint64_t) data.u5 << 16) | ((uint64_t) data.u4 << 24)
+	 | ((uint64_t) data.u3 << 32) | ((uint64_t) data.u2 << 40)
+	 | ((uint64_t) data.u1 << 48) | ((uint64_t) data.u0 << 56);
+}
+
+uint64_t read_be64_3 (void)
+{
+  unsigned char *data;
+
+  read_aux (data, 8);
+  return (uint64_t) *(data + 7) | ((uint64_t) *(data + 6) << 8)
+	 | ((uint64_t) *(data + 5) << 16) | ((uint64_t) *(data + 4) << 24)
+	 | ((uint64_t) *(data + 3) << 32) | ((uint64_t) *(data + 2) << 40)
+	 | ((uint64_t) *(data + 1) << 48) | ((uint64_t) *data << 56);
+}
+
+/* { dg-final { scan-tree-dump-times "64 bit bswap implementation found at" 3 "bswap" } } */
+/* { dg-final { cleanup-tree-dump "bswap" } } */
diff --git a/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c b/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
new file mode 100644
index 0000000..4dcd3e3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
@@ -0,0 +1,64 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target bswap16 } */
+/* { dg-require-effective-target stdint_types } */
+/* { dg-options "-O2 -fdump-tree-bswap" } */
+/* { dg-options "-O2 -fdump-tree-bswap -march=z900" { target s390-*-* } } */
+
+#include <stdint.h>
+
+struct uint16_st {
+  unsigned char u0, u1;
+};
+
+uint32_t read_aux (void *, uint32_t);
+
+uint32_t read_le16_1 (void)
+{
+  unsigned char data[2];
+
+  read_aux (data, 2);
+  return data[0] | (data[1] << 8);
+}
+
+uint32_t read_le16_2 (void)
+{
+  struct uint16_st data;
+
+  read_aux (&data, 2);
+  return data.u0 | (data.u1 << 8);
+}
+
+uint32_t read_le16_3 (void)
+{
+  unsigned char *data;
+
+  read_aux (data, 2);
+  return *data | (*(data + 1) << 8);
+}
+
+uint32_t read_be16_1 (void)
+{
+  unsigned char data[2];
+
+  read_aux (data, 2);
+  return data[1] | (data[0] << 8);
+}
+
+uint32_t read_be16_2 (void)
+{
+  struct uint16_st data;
+
+  read_aux (&data, 2);
+  return data.u1 | (data.u0 << 8);
+}
+
+uint32_t read_be16_3 (void)
+{
+  unsigned char *data;
+
+  read_aux (data, 2);
+  return *(data + 1) | (*data << 8);
+}
+
+/* { dg-final { scan-tree-dump-times "16 bit bswap implementation found at" 3 "bswap" } } */
+/* { dg-final { cleanup-tree-dump "bswap" } } */
diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c b/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
new file mode 100644
index 0000000..b365b96
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target bswap32 } */
+/* { dg-require-effective-target stdint_types } */
+/* { dg-options "-O2 -fdump-tree-bswap" } */
+/* { dg-options "-O2 -fdump-tree-bswap -march=z900" { target s390-*-* } } */
+
+#include <stdint.h>
+
+struct uint32_st {
+  unsigned char u0, u1, u2, u3;
+};
+
+uint32_t read_aux (void *, uint32_t);
+
+uint32_t read_le32_1 (void)
+{
+  unsigned char data[4];
+
+  read_aux (data, 4);
+  return data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
+}
+
+uint32_t read_le32_2 (void)
+{
+  struct uint32_st data;
+
+  read_aux (&data, 4);
+  return data.u0 | (data.u1 << 8) | (data.u2 << 16) | (data.u3 << 24);
+}
+
+uint32_t read_le32_3 (void)
+{
+  unsigned char *data;
+
+  read_aux (data, 4);
+  return *data | (*(data + 1) << 8) | (*(data + 2) << 16)
+	 | (*(data + 3) << 24);
+}
+
+uint32_t read_be32_1 (void)
+{
+  unsigned char data[4];
+
+  read_aux (data, 4);
+  return data[3] | (data[2] << 8) | (data[1] << 16) | (data[0] << 24);
+}
+
+uint32_t read_be32_2 (void)
+{
+  struct uint32_st data;
+
+  read_aux (&data, 4);
+  return data.u3 | (data.u2 << 8) | (data.u1 << 16) | (data.u0 << 24);
+}
+
+uint32_t read_be32_3 (void)
+{
+  unsigned char *data;
+
+  read_aux (data, 4);
+  return *(data + 3) | (*(data + 2) << 8) | (*(data + 1) << 16)
+	 | (*data << 24);
+}
+
+/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 3 "bswap" } } */
+/* { dg-final { cleanup-tree-dump "bswap" } } */
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index b965ad1..65e7ffe 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -98,6 +98,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "is-a.h"
 #include "gimple.h"
 #include "gimple-iterator.h"
+#include "gimplify.h"
 #include "gimplify-me.h"
 #include "stor-layout.h"
 #include "gimple-ssa.h"
@@ -1606,11 +1607,28 @@ make_pass_cse_sincos (gcc::context *ctxt)
 
    0    - byte has the value 0
    1..size - byte contains the content of the byte
-   number indexed with that value minus one  */
+   number indexed with that value minus one.
+
+   To detect permutations on memory sources (arrays and structures), a symbolic
+   number is also associated a base address (the array or structure the load is
+   made from), an offset from the base address and a range which gives the
+   difference between the highest and lowest accessed memory location to make
+   such a symbolic number. The range is thus different from size which reflects
+   the size of the type of current expression.
+
+   For instance, for an array char a[], (short) a[0] | (short) a[3] would have
+   a size of 2 but a range of 4 while (short) a[0] | ((short) a[0] << 1) would
+   still have a size of 2 but this time a range of 1.  */
 
 struct symbolic_number {
   unsigned HOST_WIDEST_INT n;
   int size;
+  tree base_addr;
+  tree offset;
+  HOST_WIDE_INT bytepos;
+  tree alias_set;
+  tree vuse;
+  unsigned HOST_WIDE_INT range;
 };
 
 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
@@ -1672,6 +1690,37 @@ verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
   return true;
 }
 
+/* Check if STMT might be a byte swap from a memory source and returns the
+   answer. If so, REF is that memory source and the base of the memory area
+   accessed and the offset of the access from that base are recorded in N.  */
+
+bool
+find_bswap_load (gimple stmt, tree ref, struct symbolic_number *n)
+{
+  /* Leaf node is an array or component ref. Memorize its base and
+     offset from base to compare to other such leaf node.  */
+  HOST_WIDE_INT bitsize, bitpos;
+  enum machine_mode mode;
+  int unsignedp, volatilep;
+
+  if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
+    return false;
+
+  n->base_addr = get_inner_reference (ref, &bitsize, &bitpos, &n->offset,
+				      &mode, &unsignedp, &volatilep, false,
+				      true);
+
+  if (bitpos % BITS_PER_UNIT)
+    return false;
+  if (bitsize % BITS_PER_UNIT)
+    return false;
+
+  n->bytepos = bitpos / BITS_PER_UNIT;
+  n->alias_set = reference_alias_ptr_type (ref);
+  n->vuse = gimple_vuse (stmt);
+  return true;
+}
+
 /* find_bswap_1 invokes itself recursively with N and tries to perform
    the operation given by the rhs of STMT on the result.  If the
    operation could successfully be executed the function returns the
@@ -1691,6 +1740,9 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
 
   rhs1 = gimple_assign_rhs1 (stmt);
 
+  if (find_bswap_load (stmt, rhs1, n))
+    return rhs1;
+
   if (TREE_CODE (rhs1) != SSA_NAME)
     return NULL_TREE;
 
@@ -1719,9 +1771,9 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
 
       source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1);
 
-      /* If find_bswap_1 returned NULL STMT is a leaf node and we have
+      /* If find_bswap_1 returned NULL, STMT is a leaf node and we have
 	 to initialize the symbolic number.  */
-      if (!source_expr1)
+      if (!source_expr1 || gimple_assign_load_p (rhs1_stmt))
 	{
 	  /* Set up the symbolic number N by setting each byte to a
 	     value between 1 and the byte size of rhs1.  The highest
@@ -1731,6 +1783,7 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
 	  if (n->size % BITS_PER_UNIT != 0)
 	    return NULL_TREE;
 	  n->size /= BITS_PER_UNIT;
+	  n->range = n->size;
 	  n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 :
 		  (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201);
 
@@ -1738,7 +1791,11 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
 	    n->n &= ((unsigned HOST_WIDEST_INT)1 <<
 		     (n->size * BITS_PER_UNIT)) - 1;
 
-	  source_expr1 = rhs1;
+	  if (!source_expr1)
+	    {
+	      n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
+	      source_expr1 = rhs1;
+	    }
 	}
 
       switch (code)
@@ -1814,10 +1871,72 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
 
 	  source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1);
 
-	  if (source_expr1 != source_expr2
-	      || n1.size != n2.size)
+	  if (n1.size != n2.size || !source_expr2)
 	    return NULL_TREE;
 
+	  if (!n1.vuse != !n2.vuse ||
+	  (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
+	    return NULL_TREE;
+
+	  if (source_expr1 != source_expr2)
+	    {
+	      HOST_WIDEST_INT inc, mask;
+	      unsigned i;
+	      HOST_WIDE_INT off_sub;
+	      struct symbolic_number *n_ptr;
+
+	      if (!n1.base_addr || !n2.base_addr
+		  || !operand_equal_p (n1.base_addr, n2.base_addr, 0))
+		return NULL_TREE;
+	      if (!n1.offset != !n2.offset ||
+	          (n1.offset && !operand_equal_p (n1.offset, n2.offset, 0)))
+		return NULL_TREE;
+
+	      /* We swap n1 with n2 to have n1 < n2.  */
+	      if (n2.bytepos < n1.bytepos)
+		{
+		  struct symbolic_number tmpn;
+
+		  tmpn = n2;
+		  n2 = n1;
+		  n1 = tmpn;
+		  source_expr1 = source_expr2;
+		}
+
+	      off_sub = n2.bytepos - n1.bytepos;
+
+	      /* Check that the range of memory covered < biggest int size.  */
+	      if (off_sub + n2.range > (int) sizeof (HOST_WIDEST_INT))
+	        return NULL_TREE;
+	      n->range = n2.range + off_sub;
+
+	      /* Reinterpret byte marks in symbolic number holding the value of
+		 bigger weight according to host endianness.  */
+	      inc = BYTES_BIG_ENDIAN ? off_sub + n2.range - n1.range : off_sub;
+	      mask = 0xFF;
+	      if (BYTES_BIG_ENDIAN)
+		n_ptr = &n1;
+	      else
+		n_ptr = &n2;
+	      for (i = 0; i < sizeof (HOST_WIDEST_INT); i++, inc <<= 8,
+		   mask <<= 8)
+		{
+		  if (n_ptr->n & mask)
+		    n_ptr->n += inc;
+		}
+	    }
+	  else
+	    n->range = n1.range;
+
+	  if (!n1.alias_set
+	      || alias_ptr_types_compatible_p (n1.alias_set, n2.alias_set))
+	    n->alias_set = n1.alias_set;
+	  else
+	    n->alias_set = ptr_type_node;
+	  n->vuse = n1.vuse;
+	  n->base_addr = n1.base_addr;
+	  n->offset = n1.offset;
+	  n->bytepos = n1.bytepos;
 	  n->size = n1.size;
 	  for (i = 0, mask = 0xff; i < n->size; i++, mask <<= BITS_PER_UNIT)
 	    {
@@ -1843,14 +1962,16 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
 }
 
 /* Check if STMT completes a bswap implementation consisting of ORs,
-   SHIFTs and ANDs.  Return the source tree expression on which the
-   byte swap is performed and NULL if no bswap was found.  */
+   SHIFTs and ANDs.  If the source lies in memory, it also sets
+   *ALIAS_SET to the alias-set of the memory reference, *VUSE to its
+   VUSE and *SIZE to the size of the load needed.  At last, the
+   function returns the source tree expression.  */
 
 static tree
-find_bswap (gimple stmt)
+find_bswap (gimple stmt, tree *alias_set, tree *vuse, int *size)
 {
 /* The number which the find_bswap result should match in order to
-   have a full byte swap.  The number is shifted to the left according
+   have a full byte swap.  The number is shifted to the right according
    to the size of the symbolic number before using it.  */
   unsigned HOST_WIDEST_INT cmp =
     sizeof (HOST_WIDEST_INT) < 8 ? 0 :
@@ -1858,12 +1979,12 @@ find_bswap (gimple stmt)
 
   struct symbolic_number n;
   tree source_expr;
-  int limit;
+  int limit, rsize;
 
   /* The last parameter determines the depth search limit.  It usually
      correlates directly to the number of bytes to be touched.  We
      increase that number by three  here in order to also
-     cover signed -> unsigned converions of the src operand as can be seen
+     cover signed -> unsigned conversions of the src operand as can be seen
      in libgcc, and for initial shift/and operation of the src operand.  */
   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
@@ -1875,18 +1996,35 @@ find_bswap (gimple stmt)
   /* Zero out the extra bits of N and CMP.  */
   if (n.size < (int)sizeof (HOST_WIDEST_INT))
     {
+      int tmpn;
       unsigned HOST_WIDEST_INT mask =
 	((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1;
 
       n.n &= mask;
-      cmp >>= (sizeof (HOST_WIDEST_INT) - n.size) * BITS_PER_UNIT;
+      /* Find real size of result (highest non zero byte).  */
+      for (tmpn = n.n, rsize = 0; tmpn; tmpn >>= BITS_PER_UNIT, rsize++);
+      cmp >>= (sizeof (HOST_WIDEST_INT) - rsize) * BITS_PER_UNIT;
     }
+  else
+    rsize = n.size;
 
   /* A complete byte swap should make the symbolic number to start
      with the largest digit in the highest order byte.  */
   if (cmp != n.n)
     return NULL_TREE;
 
+  *alias_set = NULL_TREE;
+
+  if (n.base_addr)
+    {
+      *alias_set = n.alias_set;
+      *vuse = n.vuse;
+      n.size = rsize;
+    }
+  else if (rsize != n.size)
+    return NULL_TREE;
+
+  *size = n.size * BITS_PER_UNIT;
   return source_expr;
 }
 
@@ -1984,7 +2122,7 @@ pass_optimize_bswap::execute (function *fun)
       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
         {
 	  gimple stmt = gsi_stmt (gsi);
-	  tree bswap_src, bswap_type;
+	  tree bswap_src, bswap_type, load_type, alias_type, vuse = NULL_TREE;
 	  tree bswap_tmp;
 	  tree fndecl = NULL_TREE;
 	  int type_size;
@@ -1994,11 +2132,15 @@ pass_optimize_bswap::execute (function *fun)
 	      || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR)
 	    continue;
 
-	  type_size = TYPE_PRECISION (gimple_expr_type (stmt));
+	  bswap_src = find_bswap (stmt, &alias_type, &vuse, &type_size);
+
+	  if (!bswap_src)
+	    continue;
 
 	  switch (type_size)
 	    {
 	    case 16:
+	      load_type = uint16_type_node;
 	      if (bswap16_p)
 		{
 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16);
@@ -2006,6 +2148,7 @@ pass_optimize_bswap::execute (function *fun)
 		}
 	      break;
 	    case 32:
+	      load_type = uint32_type_node;
 	      if (bswap32_p)
 		{
 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
@@ -2013,6 +2156,7 @@ pass_optimize_bswap::execute (function *fun)
 		}
 	      break;
 	    case 64:
+	      load_type = uint64_type_node;
 	      if (bswap64_p)
 		{
 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
@@ -2026,10 +2170,39 @@ pass_optimize_bswap::execute (function *fun)
 	  if (!fndecl)
 	    continue;
 
-	  bswap_src = find_bswap (stmt);
 
-	  if (!bswap_src)
-	    continue;
+	  /* Need to load the value from memory first.  */
+	  if (alias_type)
+	    {
+	      tree addr_expr, addr_tmp, val_expr, val_tmp;
+	      tree load_offset_ptr;
+	      gimple addr_stmt, load_stmt;
+
+	      changed = true;
+
+	      /*  Compute address to load from and cast according to the size
+		  of the load.  */
+	      addr_expr = build_fold_addr_expr (unshare_expr (bswap_src));
+	      if (is_gimple_min_invariant (addr_expr))
+		addr_tmp = addr_expr;
+	      else
+		{
+		  addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
+						 "load_src");
+		  addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
+		  gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
+		}
+
+	      /* Perform the load.  */
+	      load_offset_ptr = build_int_cst (alias_type, 0);
+	      val_tmp = make_temp_ssa_name (load_type, NULL, "load_dst");
+	      val_expr = fold_build2 (MEM_REF, load_type, addr_tmp,
+				      load_offset_ptr);
+	      load_stmt = gimple_build_assign (val_tmp, val_expr);
+	      gimple_set_vuse (load_stmt, vuse);
+	      gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
+	      bswap_src = val_tmp;
+	    }
 
 	  changed = true;
 	  if (type_size == 16)
diff --git a/gcc/tree.h b/gcc/tree.h
index ae4876d..0cb2abe 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4539,7 +4539,7 @@ extern tree build_personality_function (const char *);
    the access position and size.  */
 extern tree get_inner_reference (tree, HOST_WIDE_INT *, HOST_WIDE_INT *,
 				 tree *, enum machine_mode *, int *, int *,
-				 bool);
+				 bool, bool = false);
 
 /* Return a tree representing the lower bound of the array mentioned in
    EXP, an ARRAY_REF or an ARRAY_RANGE_REF.  */
