[PATCH] D135951: [X86][WIP] SUPPORT RAO-INT

Phoebe Wang via Phabricator via cfe-commits Sat, 15 Oct 2022 08:23:32 -0700

pengfei updated this revision to Diff 468020.
pengfei added a comment.

Add atomic operations lowering for RAO-INT instructions.



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135951/new/

https://reviews.llvm.org/D135951

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/cpuid.h
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/lib/Target/X86/X86ISelLowering.h
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86InstrRAOINT.td
  llvm/test/CodeGen/X86/atomic-instructions-32.ll
  llvm/test/CodeGen/X86/atomic-instructions-64.ll
  llvm/test/MC/Disassembler/X86/rao-int.txt
  llvm/test/MC/Disassembler/X86/x86-64-rao-int.txt
  llvm/test/MC/X86/rao-int-att.s
  llvm/test/MC/X86/rao-int-intel.s
  llvm/test/MC/X86/x86-64-rao-int-att.s
  llvm/test/MC/X86/x86-64-rao-int-intel.s

Index: llvm/test/MC/X86/x86-64-rao-int-intel.s
===================================================================
--- /dev/null
+++ llvm/test/MC/X86/x86-64-rao-int-intel.s
@@ -0,0 +1,193 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK:      aadd qword ptr [rbp + 8*r14 + 268435456], r9
+// CHECK: encoding: [0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+               aadd qword ptr [rbp + 8*r14 + 268435456], r9
+
+// CHECK:      aadd qword ptr [r8 + 4*rax + 291], r9
+// CHECK: encoding: [0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+               aadd qword ptr [r8 + 4*rax + 291], r9
+
+// CHECK:      aadd qword ptr [rip], r9
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+               aadd qword ptr [rip], r9
+
+// CHECK:      aadd qword ptr [2*rbp - 512], r9
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+               aadd qword ptr [2*rbp - 512], r9
+
+// CHECK:      aadd qword ptr [rcx + 2032], r9
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+               aadd qword ptr [rcx + 2032], r9
+
+// CHECK:      aadd qword ptr [rdx - 2048], r9
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+               aadd qword ptr [rdx - 2048], r9
+
+// CHECK:      aadd dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               aadd dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK:      aadd dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               aadd dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK:      aadd dword ptr [eax], ebx
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x18]
+               aadd dword ptr [eax], ebx
+
+// CHECK:      aadd dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               aadd dword ptr [2*ebp - 512], ebx
+
+// CHECK:      aadd dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               aadd dword ptr [ecx + 2032], ebx
+
+// CHECK:      aadd dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               aadd dword ptr [edx - 2048], ebx
+
+// CHECK:      aand qword ptr [rbp + 8*r14 + 268435456], r9
+// CHECK: encoding: [0x66,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+               aand qword ptr [rbp + 8*r14 + 268435456], r9
+
+// CHECK:      aand qword ptr [r8 + 4*rax + 291], r9
+// CHECK: encoding: [0x66,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+               aand qword ptr [r8 + 4*rax + 291], r9
+
+// CHECK:      aand qword ptr [rip], r9
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+               aand qword ptr [rip], r9
+
+// CHECK:      aand qword ptr [2*rbp - 512], r9
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+               aand qword ptr [2*rbp - 512], r9
+
+// CHECK:      aand qword ptr [rcx + 2032], r9
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+               aand qword ptr [rcx + 2032], r9
+
+// CHECK:      aand qword ptr [rdx - 2048], r9
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+               aand qword ptr [rdx - 2048], r9
+
+// CHECK:      aand dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               aand dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK:      aand dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               aand dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK:      aand dword ptr [eax], ebx
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x18]
+               aand dword ptr [eax], ebx
+
+// CHECK:      aand dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               aand dword ptr [2*ebp - 512], ebx
+
+// CHECK:      aand dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               aand dword ptr [ecx + 2032], ebx
+
+// CHECK:      aand dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               aand dword ptr [edx - 2048], ebx
+
+// CHECK:      aor qword ptr [rbp + 8*r14 + 268435456], r9
+// CHECK: encoding: [0xf2,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+               aor qword ptr [rbp + 8*r14 + 268435456], r9
+
+// CHECK:      aor qword ptr [r8 + 4*rax + 291], r9
+// CHECK: encoding: [0xf2,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+               aor qword ptr [r8 + 4*rax + 291], r9
+
+// CHECK:      aor qword ptr [rip], r9
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+               aor qword ptr [rip], r9
+
+// CHECK:      aor qword ptr [2*rbp - 512], r9
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+               aor qword ptr [2*rbp - 512], r9
+
+// CHECK:      aor qword ptr [rcx + 2032], r9
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+               aor qword ptr [rcx + 2032], r9
+
+// CHECK:      aor qword ptr [rdx - 2048], r9
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+               aor qword ptr [rdx - 2048], r9
+
+// CHECK:      aor dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               aor dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK:      aor dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               aor dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK:      aor dword ptr [eax], ebx
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x18]
+               aor dword ptr [eax], ebx
+
+// CHECK:      aor dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               aor dword ptr [2*ebp - 512], ebx
+
+// CHECK:      aor dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               aor dword ptr [ecx + 2032], ebx
+
+// CHECK:      aor dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               aor dword ptr [edx - 2048], ebx
+
+// CHECK:      axor qword ptr [rbp + 8*r14 + 268435456], r9
+// CHECK: encoding: [0xf3,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+               axor qword ptr [rbp + 8*r14 + 268435456], r9
+
+// CHECK:      axor qword ptr [r8 + 4*rax + 291], r9
+// CHECK: encoding: [0xf3,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+               axor qword ptr [r8 + 4*rax + 291], r9
+
+// CHECK:      axor qword ptr [rip], r9
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+               axor qword ptr [rip], r9
+
+// CHECK:      axor qword ptr [2*rbp - 512], r9
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+               axor qword ptr [2*rbp - 512], r9
+
+// CHECK:      axor qword ptr [rcx + 2032], r9
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+               axor qword ptr [rcx + 2032], r9
+
+// CHECK:      axor qword ptr [rdx - 2048], r9
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+               axor qword ptr [rdx - 2048], r9
+
+// CHECK:      axor dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               axor dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK:      axor dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               axor dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK:      axor dword ptr [eax], ebx
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x18]
+               axor dword ptr [eax], ebx
+
+// CHECK:      axor dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               axor dword ptr [2*ebp - 512], ebx
+
+// CHECK:      axor dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               axor dword ptr [ecx + 2032], ebx
+
+// CHECK:      axor dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               axor dword ptr [edx - 2048], ebx
Index: llvm/test/MC/X86/x86-64-rao-int-att.s
===================================================================
--- /dev/null
+++ llvm/test/MC/X86/x86-64-rao-int-att.s
@@ -0,0 +1,193 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK:      aaddq  %r9, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+               aaddq  %r9, 268435456(%rbp,%r14,8)
+
+// CHECK:      aaddq  %r9, 291(%r8,%rax,4)
+// CHECK: encoding: [0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+               aaddq  %r9, 291(%r8,%rax,4)
+
+// CHECK:      aaddq  %r9, (%rip)
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+               aaddq  %r9, (%rip)
+
+// CHECK:      aaddq  %r9, -512(,%rbp,2)
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+               aaddq  %r9, -512(,%rbp,2)
+
+// CHECK:      aaddq  %r9, 2032(%rcx)
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+               aaddq  %r9, 2032(%rcx)
+
+// CHECK:      aaddq  %r9, -2048(%rdx)
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+               aaddq  %r9, -2048(%rdx)
+
+// CHECK:      aaddl  %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               aaddl  %ebx, 268435456(%esp,%esi,8)
+
+// CHECK:      aaddl  %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               aaddl  %ebx, 291(%edi,%eax,4)
+
+// CHECK:      aaddl  %ebx, (%eax)
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x18]
+               aaddl  %ebx, (%eax)
+
+// CHECK:      aaddl  %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               aaddl  %ebx, -512(,%ebp,2)
+
+// CHECK:      aaddl  %ebx, 2032(%ecx)
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               aaddl  %ebx, 2032(%ecx)
+
+// CHECK:      aaddl  %ebx, -2048(%edx)
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               aaddl  %ebx, -2048(%edx)
+
+// CHECK:      aandq  %r9, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0x66,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+               aandq  %r9, 268435456(%rbp,%r14,8)
+
+// CHECK:      aandq  %r9, 291(%r8,%rax,4)
+// CHECK: encoding: [0x66,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+               aandq  %r9, 291(%r8,%rax,4)
+
+// CHECK:      aandq  %r9, (%rip)
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+               aandq  %r9, (%rip)
+
+// CHECK:      aandq  %r9, -512(,%rbp,2)
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+               aandq  %r9, -512(,%rbp,2)
+
+// CHECK:      aandq  %r9, 2032(%rcx)
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+               aandq  %r9, 2032(%rcx)
+
+// CHECK:      aandq  %r9, -2048(%rdx)
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+               aandq  %r9, -2048(%rdx)
+
+// CHECK:      aandl  %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               aandl  %ebx, 268435456(%esp,%esi,8)
+
+// CHECK:      aandl  %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               aandl  %ebx, 291(%edi,%eax,4)
+
+// CHECK:      aandl  %ebx, (%eax)
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x18]
+               aandl  %ebx, (%eax)
+
+// CHECK:      aandl  %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               aandl  %ebx, -512(,%ebp,2)
+
+// CHECK:      aandl  %ebx, 2032(%ecx)
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               aandl  %ebx, 2032(%ecx)
+
+// CHECK:      aandl  %ebx, -2048(%edx)
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               aandl  %ebx, -2048(%edx)
+
+// CHECK:      aorq  %r9, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0xf2,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+               aorq  %r9, 268435456(%rbp,%r14,8)
+
+// CHECK:      aorq  %r9, 291(%r8,%rax,4)
+// CHECK: encoding: [0xf2,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+               aorq  %r9, 291(%r8,%rax,4)
+
+// CHECK:      aorq  %r9, (%rip)
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+               aorq  %r9, (%rip)
+
+// CHECK:      aorq  %r9, -512(,%rbp,2)
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+               aorq  %r9, -512(,%rbp,2)
+
+// CHECK:      aorq  %r9, 2032(%rcx)
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+               aorq  %r9, 2032(%rcx)
+
+// CHECK:      aorq  %r9, -2048(%rdx)
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+               aorq  %r9, -2048(%rdx)
+
+// CHECK:      aorl  %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               aorl  %ebx, 268435456(%esp,%esi,8)
+
+// CHECK:      aorl  %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               aorl  %ebx, 291(%edi,%eax,4)
+
+// CHECK:      aorl  %ebx, (%eax)
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x18]
+               aorl  %ebx, (%eax)
+
+// CHECK:      aorl  %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               aorl  %ebx, -512(,%ebp,2)
+
+// CHECK:      aorl  %ebx, 2032(%ecx)
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               aorl  %ebx, 2032(%ecx)
+
+// CHECK:      aorl  %ebx, -2048(%edx)
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               aorl  %ebx, -2048(%edx)
+
+// CHECK:      axorq  %r9, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0xf3,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+               axorq  %r9, 268435456(%rbp,%r14,8)
+
+// CHECK:      axorq  %r9, 291(%r8,%rax,4)
+// CHECK: encoding: [0xf3,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+               axorq  %r9, 291(%r8,%rax,4)
+
+// CHECK:      axorq  %r9, (%rip)
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+               axorq  %r9, (%rip)
+
+// CHECK:      axorq  %r9, -512(,%rbp,2)
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+               axorq  %r9, -512(,%rbp,2)
+
+// CHECK:      axorq  %r9, 2032(%rcx)
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+               axorq  %r9, 2032(%rcx)
+
+// CHECK:      axorq  %r9, -2048(%rdx)
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+               axorq  %r9, -2048(%rdx)
+
+// CHECK:      axorl  %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               axorl  %ebx, 268435456(%esp,%esi,8)
+
+// CHECK:      axorl  %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               axorl  %ebx, 291(%edi,%eax,4)
+
+// CHECK:      axorl  %ebx, (%eax)
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x18]
+               axorl  %ebx, (%eax)
+
+// CHECK:      axorl  %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               axorl  %ebx, -512(,%ebp,2)
+
+// CHECK:      axorl  %ebx, 2032(%ecx)
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               axorl  %ebx, 2032(%ecx)
+
+// CHECK:      axorl  %ebx, -2048(%edx)
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               axorl  %ebx, -2048(%edx)
Index: llvm/test/MC/X86/rao-int-intel.s
===================================================================
--- /dev/null
+++ llvm/test/MC/X86/rao-int-intel.s
@@ -0,0 +1,97 @@
+// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK:      aadd dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               aadd dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK:      aadd dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               aadd dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK:      aadd dword ptr [eax], ebx
+// CHECK: encoding: [0x0f,0x38,0xfc,0x18]
+               aadd dword ptr [eax], ebx
+
+// CHECK:      aadd dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               aadd dword ptr [2*ebp - 512], ebx
+
+// CHECK:      aadd dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               aadd dword ptr [ecx + 2032], ebx
+
+// CHECK:      aadd dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               aadd dword ptr [edx - 2048], ebx
+
+// CHECK:      aand dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               aand dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK:      aand dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               aand dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK:      aand dword ptr [eax], ebx
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x18]
+               aand dword ptr [eax], ebx
+
+// CHECK:      aand dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               aand dword ptr [2*ebp - 512], ebx
+
+// CHECK:      aand dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               aand dword ptr [ecx + 2032], ebx
+
+// CHECK:      aand dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               aand dword ptr [edx - 2048], ebx
+
+// CHECK:      aor dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               aor dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK:      aor dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               aor dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK:      aor dword ptr [eax], ebx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x18]
+               aor dword ptr [eax], ebx
+
+// CHECK:      aor dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               aor dword ptr [2*ebp - 512], ebx
+
+// CHECK:      aor dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               aor dword ptr [ecx + 2032], ebx
+
+// CHECK:      aor dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               aor dword ptr [edx - 2048], ebx
+
+// CHECK:      axor dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               axor dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK:      axor dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               axor dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK:      axor dword ptr [eax], ebx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x18]
+               axor dword ptr [eax], ebx
+
+// CHECK:      axor dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               axor dword ptr [2*ebp - 512], ebx
+
+// CHECK:      axor dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               axor dword ptr [ecx + 2032], ebx
+
+// CHECK:      axor dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               axor dword ptr [edx - 2048], ebx
Index: llvm/test/MC/X86/rao-int-att.s
===================================================================
--- /dev/null
+++ llvm/test/MC/X86/rao-int-att.s
@@ -0,0 +1,97 @@
+// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK:      aaddl  %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               aaddl  %ebx, 268435456(%esp,%esi,8)
+
+// CHECK:      aaddl  %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               aaddl  %ebx, 291(%edi,%eax,4)
+
+// CHECK:      aaddl  %ebx, (%eax)
+// CHECK: encoding: [0x0f,0x38,0xfc,0x18]
+               aaddl  %ebx, (%eax)
+
+// CHECK:      aaddl  %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               aaddl  %ebx, -512(,%ebp,2)
+
+// CHECK:      aaddl  %ebx, 2032(%ecx)
+// CHECK: encoding: [0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               aaddl  %ebx, 2032(%ecx)
+
+// CHECK:      aaddl  %ebx, -2048(%edx)
+// CHECK: encoding: [0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               aaddl  %ebx, -2048(%edx)
+
+// CHECK:      aandl  %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               aandl  %ebx, 268435456(%esp,%esi,8)
+
+// CHECK:      aandl  %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               aandl  %ebx, 291(%edi,%eax,4)
+
+// CHECK:      aandl  %ebx, (%eax)
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x18]
+               aandl  %ebx, (%eax)
+
+// CHECK:      aandl  %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               aandl  %ebx, -512(,%ebp,2)
+
+// CHECK:      aandl  %ebx, 2032(%ecx)
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               aandl  %ebx, 2032(%ecx)
+
+// CHECK:      aandl  %ebx, -2048(%edx)
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               aandl  %ebx, -2048(%edx)
+
+// CHECK:      aorl  %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               aorl  %ebx, 268435456(%esp,%esi,8)
+
+// CHECK:      aorl  %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               aorl  %ebx, 291(%edi,%eax,4)
+
+// CHECK:      aorl  %ebx, (%eax)
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x18]
+               aorl  %ebx, (%eax)
+
+// CHECK:      aorl  %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               aorl  %ebx, -512(,%ebp,2)
+
+// CHECK:      aorl  %ebx, 2032(%ecx)
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               aorl  %ebx, 2032(%ecx)
+
+// CHECK:      aorl  %ebx, -2048(%edx)
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               aorl  %ebx, -2048(%edx)
+
+// CHECK:      axorl  %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+               axorl  %ebx, 268435456(%esp,%esi,8)
+
+// CHECK:      axorl  %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+               axorl  %ebx, 291(%edi,%eax,4)
+
+// CHECK:      axorl  %ebx, (%eax)
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x18]
+               axorl  %ebx, (%eax)
+
+// CHECK:      axorl  %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+               axorl  %ebx, -512(,%ebp,2)
+
+// CHECK:      axorl  %ebx, 2032(%ecx)
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+               axorl  %ebx, 2032(%ecx)
+
+// CHECK:      axorl  %ebx, -2048(%edx)
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+               axorl  %ebx, -2048(%edx)
Index: llvm/test/MC/Disassembler/X86/x86-64-rao-int.txt
===================================================================
--- /dev/null
+++ llvm/test/MC/Disassembler/X86/x86-64-rao-int.txt
@@ -0,0 +1,194 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:        aaddq  %r9, 268435456(%rbp,%r14,8)
+# INTEL:      aadd qword ptr [rbp + 8*r14 + 268435456], r9
+0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        aaddq  %r9, 291(%r8,%rax,4)
+# INTEL:      aadd qword ptr [r8 + 4*rax + 291], r9
+0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        aaddq  %r9, (%rip)
+# INTEL:      aadd qword ptr [rip], r9
+0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00
+
+# ATT:        aaddq  %r9, -512(,%rbp,2)
+# INTEL:      aadd qword ptr [2*rbp - 512], r9
+0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        aaddq  %r9, 2032(%rcx)
+# INTEL:      aadd qword ptr [rcx + 2032], r9
+0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00
+
+# ATT:        aaddq  %r9, -2048(%rdx)
+# INTEL:      aadd qword ptr [rdx - 2048], r9
+0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff
+
+# ATT:        aaddl  %ebx, 268435456(%esp,%esi,8)
+# INTEL:      aadd dword ptr [esp + 8*esi + 268435456], ebx
+0x67,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        aaddl  %ebx, 291(%edi,%eax,4)
+# INTEL:      aadd dword ptr [edi + 4*eax + 291], ebx
+0x67,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        aaddl  %ebx, (%eax)
+# INTEL:      aadd dword ptr [eax], ebx
+0x67,0x0f,0x38,0xfc,0x18
+
+# ATT:        aaddl  %ebx, -512(,%ebp,2)
+# INTEL:      aadd dword ptr [2*ebp - 512], ebx
+0x67,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        aaddl  %ebx, 2032(%ecx)
+# INTEL:      aadd dword ptr [ecx + 2032], ebx
+0x67,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT:        aaddl  %ebx, -2048(%edx)
+# INTEL:      aadd dword ptr [edx - 2048], ebx
+0x67,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
+
+# ATT:        aandq  %r9, 268435456(%rbp,%r14,8)
+# INTEL:      aand qword ptr [rbp + 8*r14 + 268435456], r9
+0x66,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        aandq  %r9, 291(%r8,%rax,4)
+# INTEL:      aand qword ptr [r8 + 4*rax + 291], r9
+0x66,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        aandq  %r9, (%rip)
+# INTEL:      aand qword ptr [rip], r9
+0x66,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00
+
+# ATT:        aandq  %r9, -512(,%rbp,2)
+# INTEL:      aand qword ptr [2*rbp - 512], r9
+0x66,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        aandq  %r9, 2032(%rcx)
+# INTEL:      aand qword ptr [rcx + 2032], r9
+0x66,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00
+
+# ATT:        aandq  %r9, -2048(%rdx)
+# INTEL:      aand qword ptr [rdx - 2048], r9
+0x66,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff
+
+# ATT:        aandl  %ebx, 268435456(%esp,%esi,8)
+# INTEL:      aand dword ptr [esp + 8*esi + 268435456], ebx
+0x67,0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        aandl  %ebx, 291(%edi,%eax,4)
+# INTEL:      aand dword ptr [edi + 4*eax + 291], ebx
+0x67,0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        aandl  %ebx, (%eax)
+# INTEL:      aand dword ptr [eax], ebx
+0x67,0x66,0x0f,0x38,0xfc,0x18
+
+# ATT:        aandl  %ebx, -512(,%ebp,2)
+# INTEL:      aand dword ptr [2*ebp - 512], ebx
+0x67,0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        aandl  %ebx, 2032(%ecx)
+# INTEL:      aand dword ptr [ecx + 2032], ebx
+0x67,0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT:        aandl  %ebx, -2048(%edx)
+# INTEL:      aand dword ptr [edx - 2048], ebx
+0x67,0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
+
+# ATT:        aorq  %r9, 268435456(%rbp,%r14,8)
+# INTEL:      aor qword ptr [rbp + 8*r14 + 268435456], r9
+0xf2,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        aorq  %r9, 291(%r8,%rax,4)
+# INTEL:      aor qword ptr [r8 + 4*rax + 291], r9
+0xf2,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        aorq  %r9, (%rip)
+# INTEL:      aor qword ptr [rip], r9
+0xf2,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00
+
+# ATT:        aorq  %r9, -512(,%rbp,2)
+# INTEL:      aor qword ptr [2*rbp - 512], r9
+0xf2,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        aorq  %r9, 2032(%rcx)
+# INTEL:      aor qword ptr [rcx + 2032], r9
+0xf2,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00
+
+# ATT:        aorq  %r9, -2048(%rdx)
+# INTEL:      aor qword ptr [rdx - 2048], r9
+0xf2,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff
+
+# ATT:        aorl  %ebx, 268435456(%esp,%esi,8)
+# INTEL:      aor dword ptr [esp + 8*esi + 268435456], ebx
+0x67,0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        aorl  %ebx, 291(%edi,%eax,4)
+# INTEL:      aor dword ptr [edi + 4*eax + 291], ebx
+0x67,0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        aorl  %ebx, (%eax)
+# INTEL:      aor dword ptr [eax], ebx
+0x67,0xf2,0x0f,0x38,0xfc,0x18
+
+# ATT:        aorl  %ebx, -512(,%ebp,2)
+# INTEL:      aor dword ptr [2*ebp - 512], ebx
+0x67,0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        aorl  %ebx, 2032(%ecx)
+# INTEL:      aor dword ptr [ecx + 2032], ebx
+0x67,0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT:        aorl  %ebx, -2048(%edx)
+# INTEL:      aor dword ptr [edx - 2048], ebx
+0x67,0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
+
+# ATT:        axorq  %r9, 268435456(%rbp,%r14,8)
+# INTEL:      axor qword ptr [rbp + 8*r14 + 268435456], r9
+0xf3,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        axorq  %r9, 291(%r8,%rax,4)
+# INTEL:      axor qword ptr [r8 + 4*rax + 291], r9
+0xf3,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        axorq  %r9, (%rip)
+# INTEL:      axor qword ptr [rip], r9
+0xf3,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00
+
+# ATT:        axorq  %r9, -512(,%rbp,2)
+# INTEL:      axor qword ptr [2*rbp - 512], r9
+0xf3,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        axorq  %r9, 2032(%rcx)
+# INTEL:      axor qword ptr [rcx + 2032], r9
+0xf3,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00
+
+# ATT:        axorq  %r9, -2048(%rdx)
+# INTEL:      axor qword ptr [rdx - 2048], r9
+0xf3,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff
+
+# ATT:        axorl  %ebx, 268435456(%esp,%esi,8)
+# INTEL:      axor dword ptr [esp + 8*esi + 268435456], ebx
+0x67,0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        axorl  %ebx, 291(%edi,%eax,4)
+# INTEL:      axor dword ptr [edi + 4*eax + 291], ebx
+0x67,0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        axorl  %ebx, (%eax)
+# INTEL:      axor dword ptr [eax], ebx
+0x67,0xf3,0x0f,0x38,0xfc,0x18
+
+# ATT:        axorl  %ebx, -512(,%ebp,2)
+# INTEL:      axor dword ptr [2*ebp - 512], ebx
+0x67,0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        axorl  %ebx, 2032(%ecx)
+# INTEL:      axor dword ptr [ecx + 2032], ebx
+0x67,0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT:        axorl  %ebx, -2048(%edx)
+# INTEL:      axor dword ptr [edx - 2048], ebx
+0x67,0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
Index: llvm/test/MC/Disassembler/X86/rao-int.txt
===================================================================
--- /dev/null
+++ llvm/test/MC/Disassembler/X86/rao-int.txt
@@ -0,0 +1,98 @@
+# RUN: llvm-mc --disassemble %s -triple=i686 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i686 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:        aaddl  %ebx, 268435456(%esp,%esi,8)
+# INTEL:      aadd dword ptr [esp + 8*esi + 268435456], ebx
+0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        aaddl  %ebx, 291(%edi,%eax,4)
+# INTEL:      aadd dword ptr [edi + 4*eax + 291], ebx
+0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        aaddl  %ebx, (%eax)
+# INTEL:      aadd dword ptr [eax], ebx
+0x0f,0x38,0xfc,0x18
+
+# ATT:        aaddl  %ebx, -512(,%ebp,2)
+# INTEL:      aadd dword ptr [2*ebp - 512], ebx
+0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        aaddl  %ebx, 2032(%ecx)
+# INTEL:      aadd dword ptr [ecx + 2032], ebx
+0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT:        aaddl  %ebx, -2048(%edx)
+# INTEL:      aadd dword ptr [edx - 2048], ebx
+0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
+
+# ATT:        aandl  %ebx, 268435456(%esp,%esi,8)
+# INTEL:      aand dword ptr [esp + 8*esi + 268435456], ebx
+0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        aandl  %ebx, 291(%edi,%eax,4)
+# INTEL:      aand dword ptr [edi + 4*eax + 291], ebx
+0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        aandl  %ebx, (%eax)
+# INTEL:      aand dword ptr [eax], ebx
+0x66,0x0f,0x38,0xfc,0x18
+
+# ATT:        aandl  %ebx, -512(,%ebp,2)
+# INTEL:      aand dword ptr [2*ebp - 512], ebx
+0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        aandl  %ebx, 2032(%ecx)
+# INTEL:      aand dword ptr [ecx + 2032], ebx
+0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT:        aandl  %ebx, -2048(%edx)
+# INTEL:      aand dword ptr [edx - 2048], ebx
+0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
+
+# ATT:        aorl  %ebx, 268435456(%esp,%esi,8)
+# INTEL:      aor dword ptr [esp + 8*esi + 268435456], ebx
+0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        aorl  %ebx, 291(%edi,%eax,4)
+# INTEL:      aor dword ptr [edi + 4*eax + 291], ebx
+0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        aorl  %ebx, (%eax)
+# INTEL:      aor dword ptr [eax], ebx
+0xf2,0x0f,0x38,0xfc,0x18
+
+# ATT:        aorl  %ebx, -512(,%ebp,2)
+# INTEL:      aor dword ptr [2*ebp - 512], ebx
+0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        aorl  %ebx, 2032(%ecx)
+# INTEL:      aor dword ptr [ecx + 2032], ebx
+0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT:        aorl  %ebx, -2048(%edx)
+# INTEL:      aor dword ptr [edx - 2048], ebx
+0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
+
+# ATT:        axorl  %ebx, 268435456(%esp,%esi,8)
+# INTEL:      axor dword ptr [esp + 8*esi + 268435456], ebx
+0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        axorl  %ebx, 291(%edi,%eax,4)
+# INTEL:      axor dword ptr [edi + 4*eax + 291], ebx
+0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        axorl  %ebx, (%eax)
+# INTEL:      axor dword ptr [eax], ebx
+0xf3,0x0f,0x38,0xfc,0x18
+
+# ATT:        axorl  %ebx, -512(,%ebp,2)
+# INTEL:      axor dword ptr [2*ebp - 512], ebx
+0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        axorl  %ebx, 2032(%ecx)
+# INTEL:      axor dword ptr [ecx + 2032], ebx
+0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT:        axorl  %ebx, -2048(%edx)
+# INTEL:      axor dword ptr [edx - 2048], ebx
+0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
Index: llvm/test/CodeGen/X86/atomic-instructions-64.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/atomic-instructions-64.ll
@@ -0,0 +1,293 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+raoint | FileCheck %s --check-prefixes=RAO-INT
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=NO-RAOINT
+
+define i64 @atomic_add64(i64* nocapture %p, i64 %val) nounwind ssp {
+; RAO-INT-LABEL: atomic_add64:
+; RAO-INT:       # %bb.0:
+; RAO-INT-NEXT:    movq %rsi, %rax
+; RAO-INT-NEXT:    aaddq %rax, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aaddq %rax, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aaddq %rax, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aaddq %rax, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aaddq %rax, (%rdi)
+; RAO-INT-NEXT:    lock xaddq %rax, (%rdi)
+; RAO-INT-NEXT:    retq
+;
+; NO-RAOINT-LABEL: atomic_add64:
+; NO-RAOINT:       # %bb.0:
+; NO-RAOINT-NEXT:    movq %rsi, %rax
+; NO-RAOINT-NEXT:    lock addq %rax, (%rdi)
+; NO-RAOINT-NEXT:    lock addq %rax, (%rdi)
+; NO-RAOINT-NEXT:    lock addq %rax, (%rdi)
+; NO-RAOINT-NEXT:    lock addq %rax, (%rdi)
+; NO-RAOINT-NEXT:    lock addq %rax, (%rdi)
+; NO-RAOINT-NEXT:    lock xaddq %rax, (%rdi)
+; NO-RAOINT-NEXT:    retq
+  %1 = atomicrmw add i64* %p, i64 %val monotonic
+  %2 = atomicrmw add i64* %p, i64 %val acquire
+  %3 = atomicrmw add i64* %p, i64 %val release
+  %4 = atomicrmw add i64* %p, i64 %val acq_rel
+  %5 = atomicrmw add i64* %p, i64 %val seq_cst
+
+  %6 = atomicrmw add i64* %p, i64 %val seq_cst
+  ret i64 %6
+}
+
+define i64 @atomic_or64(i64* nocapture %p, i64 %val) nounwind ssp {
+; RAO-INT-LABEL: atomic_or64:
+; RAO-INT:       # %bb.0:
+; RAO-INT-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT:    aorq %rsi, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aorq %rsi, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aorq %rsi, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aorq %rsi, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aorq %rsi, (%rdi)
+; RAO-INT-NEXT:    movq (%rdi), %rax
+; RAO-INT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT:  .LBB1_1: # %atomicrmw.start
+; RAO-INT-NEXT:    # =>This Inner Loop Header: Depth=1
+; RAO-INT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; RAO-INT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; RAO-INT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; RAO-INT-NEXT:    movq %rax, %rdx
+; RAO-INT-NEXT:    orq %rsi, %rdx
+; RAO-INT-NEXT:    lock cmpxchgq %rdx, (%rcx)
+; RAO-INT-NEXT:    sete %cl
+; RAO-INT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT:    testb $1, %cl
+; RAO-INT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT:    jne .LBB1_2
+; RAO-INT-NEXT:    jmp .LBB1_1
+; RAO-INT-NEXT:  .LBB1_2: # %atomicrmw.end
+; RAO-INT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; RAO-INT-NEXT:    retq
+;
+; NO-RAOINT-LABEL: atomic_or64:
+; NO-RAOINT:       # %bb.0:
+; NO-RAOINT-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT:    lock orq %rsi, (%rdi)
+; NO-RAOINT-NEXT:    lock orq %rsi, (%rdi)
+; NO-RAOINT-NEXT:    lock orq %rsi, (%rdi)
+; NO-RAOINT-NEXT:    lock orq %rsi, (%rdi)
+; NO-RAOINT-NEXT:    lock orq %rsi, (%rdi)
+; NO-RAOINT-NEXT:    movq (%rdi), %rax
+; NO-RAOINT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT:  .LBB1_1: # %atomicrmw.start
+; NO-RAOINT-NEXT:    # =>This Inner Loop Header: Depth=1
+; NO-RAOINT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; NO-RAOINT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; NO-RAOINT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; NO-RAOINT-NEXT:    movq %rax, %rdx
+; NO-RAOINT-NEXT:    orq %rsi, %rdx
+; NO-RAOINT-NEXT:    lock cmpxchgq %rdx, (%rcx)
+; NO-RAOINT-NEXT:    sete %cl
+; NO-RAOINT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT:    testb $1, %cl
+; NO-RAOINT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT:    jne .LBB1_2
+; NO-RAOINT-NEXT:    jmp .LBB1_1
+; NO-RAOINT-NEXT:  .LBB1_2: # %atomicrmw.end
+; NO-RAOINT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; NO-RAOINT-NEXT:    retq
+  %1 = atomicrmw or i64* %p, i64 %val monotonic
+  %2 = atomicrmw or i64* %p, i64 %val acquire
+  %3 = atomicrmw or i64* %p, i64 %val release
+  %4 = atomicrmw or i64* %p, i64 %val acq_rel
+  %5 = atomicrmw or i64* %p, i64 %val seq_cst
+
+  %6 = atomicrmw or i64* %p, i64 %val seq_cst
+  ret i64 %6
+}
+
+define i64 @atomic_xor64(i64* nocapture %p, i64 %val) nounwind ssp {
+; RAO-INT-LABEL: atomic_xor64:
+; RAO-INT:       # %bb.0:
+; RAO-INT-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT:    axorq %rsi, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    axorq %rsi, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    axorq %rsi, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    axorq %rsi, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    axorq %rsi, (%rdi)
+; RAO-INT-NEXT:    movq (%rdi), %rax
+; RAO-INT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT:  .LBB2_1: # %atomicrmw.start
+; RAO-INT-NEXT:    # =>This Inner Loop Header: Depth=1
+; RAO-INT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; RAO-INT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; RAO-INT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; RAO-INT-NEXT:    movq %rax, %rdx
+; RAO-INT-NEXT:    xorq %rsi, %rdx
+; RAO-INT-NEXT:    lock cmpxchgq %rdx, (%rcx)
+; RAO-INT-NEXT:    sete %cl
+; RAO-INT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT:    testb $1, %cl
+; RAO-INT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT:    jne .LBB2_2
+; RAO-INT-NEXT:    jmp .LBB2_1
+; RAO-INT-NEXT:  .LBB2_2: # %atomicrmw.end
+; RAO-INT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; RAO-INT-NEXT:    retq
+;
+; NO-RAOINT-LABEL: atomic_xor64:
+; NO-RAOINT:       # %bb.0:
+; NO-RAOINT-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT:    lock xorq %rsi, (%rdi)
+; NO-RAOINT-NEXT:    lock xorq %rsi, (%rdi)
+; NO-RAOINT-NEXT:    lock xorq %rsi, (%rdi)
+; NO-RAOINT-NEXT:    lock xorq %rsi, (%rdi)
+; NO-RAOINT-NEXT:    lock xorq %rsi, (%rdi)
+; NO-RAOINT-NEXT:    movq (%rdi), %rax
+; NO-RAOINT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT:  .LBB2_1: # %atomicrmw.start
+; NO-RAOINT-NEXT:    # =>This Inner Loop Header: Depth=1
+; NO-RAOINT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; NO-RAOINT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; NO-RAOINT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; NO-RAOINT-NEXT:    movq %rax, %rdx
+; NO-RAOINT-NEXT:    xorq %rsi, %rdx
+; NO-RAOINT-NEXT:    lock cmpxchgq %rdx, (%rcx)
+; NO-RAOINT-NEXT:    sete %cl
+; NO-RAOINT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT:    testb $1, %cl
+; NO-RAOINT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT:    jne .LBB2_2
+; NO-RAOINT-NEXT:    jmp .LBB2_1
+; NO-RAOINT-NEXT:  .LBB2_2: # %atomicrmw.end
+; NO-RAOINT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; NO-RAOINT-NEXT:    retq
+  %1 = atomicrmw xor i64* %p, i64 %val monotonic
+  %2 = atomicrmw xor i64* %p, i64 %val acquire
+  %3 = atomicrmw xor i64* %p, i64 %val release
+  %4 = atomicrmw xor i64* %p, i64 %val acq_rel
+  %5 = atomicrmw xor i64* %p, i64 %val seq_cst
+
+  %6 = atomicrmw xor i64* %p, i64 %val seq_cst
+  ret i64 %6
+}
+
+define i64 @atomic_and64(i64* nocapture %p, i64 %val) nounwind ssp {
+; RAO-INT-LABEL: atomic_and64:
+; RAO-INT:       # %bb.0:
+; RAO-INT-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT:    aandq %rsi, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aandq %rsi, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aandq %rsi, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aandq %rsi, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aandq %rsi, (%rdi)
+; RAO-INT-NEXT:    movq (%rdi), %rax
+; RAO-INT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT:  .LBB3_1: # %atomicrmw.start
+; RAO-INT-NEXT:    # =>This Inner Loop Header: Depth=1
+; RAO-INT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; RAO-INT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; RAO-INT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; RAO-INT-NEXT:    movq %rax, %rdx
+; RAO-INT-NEXT:    andq %rsi, %rdx
+; RAO-INT-NEXT:    lock cmpxchgq %rdx, (%rcx)
+; RAO-INT-NEXT:    sete %cl
+; RAO-INT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT:    testb $1, %cl
+; RAO-INT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT:    jne .LBB3_2
+; RAO-INT-NEXT:    jmp .LBB3_1
+; RAO-INT-NEXT:  .LBB3_2: # %atomicrmw.end
+; RAO-INT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; RAO-INT-NEXT:    retq
+;
+; NO-RAOINT-LABEL: atomic_and64:
+; NO-RAOINT:       # %bb.0:
+; NO-RAOINT-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT:    lock andq %rsi, (%rdi)
+; NO-RAOINT-NEXT:    lock andq %rsi, (%rdi)
+; NO-RAOINT-NEXT:    lock andq %rsi, (%rdi)
+; NO-RAOINT-NEXT:    lock andq %rsi, (%rdi)
+; NO-RAOINT-NEXT:    lock andq %rsi, (%rdi)
+; NO-RAOINT-NEXT:    movq (%rdi), %rax
+; NO-RAOINT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT:  .LBB3_1: # %atomicrmw.start
+; NO-RAOINT-NEXT:    # =>This Inner Loop Header: Depth=1
+; NO-RAOINT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; NO-RAOINT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; NO-RAOINT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; NO-RAOINT-NEXT:    movq %rax, %rdx
+; NO-RAOINT-NEXT:    andq %rsi, %rdx
+; NO-RAOINT-NEXT:    lock cmpxchgq %rdx, (%rcx)
+; NO-RAOINT-NEXT:    sete %cl
+; NO-RAOINT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT:    testb $1, %cl
+; NO-RAOINT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT:    jne .LBB3_2
+; NO-RAOINT-NEXT:    jmp .LBB3_1
+; NO-RAOINT-NEXT:  .LBB3_2: # %atomicrmw.end
+; NO-RAOINT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; NO-RAOINT-NEXT:    retq
+  %1 = atomicrmw and i64* %p, i64 %val monotonic
+  %2 = atomicrmw and i64* %p, i64 %val acquire
+  %3 = atomicrmw and i64* %p, i64 %val release
+  %4 = atomicrmw and i64* %p, i64 %val acq_rel
+  %5 = atomicrmw and i64* %p, i64 %val seq_cst
+
+  %6 = atomicrmw and i64* %p, i64 %val seq_cst
+  ret i64 %6
+}
+
+define i64 @atomic_sub64(i64* nocapture %p, i64 %val) nounwind ssp {
+; RAO-INT-LABEL: atomic_sub64:
+; RAO-INT:       # %bb.0:
+; RAO-INT-NEXT:    movq %rsi, %rax
+; RAO-INT-NEXT:    negq %rax
+; RAO-INT-NEXT:    aaddq %rax, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aaddq %rax, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aaddq %rax, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aaddq %rax, (%rdi)
+; RAO-INT-NEXT:    mfence
+; RAO-INT-NEXT:    aaddq %rax, (%rdi)
+; RAO-INT-NEXT:    lock xaddq %rax, (%rdi)
+; RAO-INT-NEXT:    retq
+;
+; NO-RAOINT-LABEL: atomic_sub64:
+; NO-RAOINT:       # %bb.0:
+; NO-RAOINT-NEXT:    movq %rsi, %rax
+; NO-RAOINT-NEXT:    lock subq %rax, (%rdi)
+; NO-RAOINT-NEXT:    lock subq %rax, (%rdi)
+; NO-RAOINT-NEXT:    lock subq %rax, (%rdi)
+; NO-RAOINT-NEXT:    lock subq %rax, (%rdi)
+; NO-RAOINT-NEXT:    lock subq %rax, (%rdi)
+; NO-RAOINT-NEXT:    negq %rax
+; NO-RAOINT-NEXT:    lock xaddq %rax, (%rdi)
+; NO-RAOINT-NEXT:    retq
+  %1 = atomicrmw sub i64* %p, i64 %val monotonic
+  %2 = atomicrmw sub i64* %p, i64 %val acquire
+  %3 = atomicrmw sub i64* %p, i64 %val release
+  %4 = atomicrmw sub i64* %p, i64 %val acq_rel
+  %5 = atomicrmw sub i64* %p, i64 %val seq_cst
+
+  %6 = atomicrmw sub i64* %p, i64 %val seq_cst
+  ret i64 %6
+}
Index: llvm/test/CodeGen/X86/atomic-instructions-32.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/atomic-instructions-32.ll
@@ -0,0 +1,575 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86-NO-RAOINT
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=i686-unknown-unknown -mattr=+raoint | FileCheck %s --check-prefixes=X86-RAO-INT
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64-NO-RAOINT
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+raoint | FileCheck %s --check-prefixes=X64-RAO-INT
+
+define i32 @atomic_add32(i32* nocapture %p, i32 %val) nounwind ssp {
+; X86-NO-RAOINT-LABEL: atomic_add32:
+; X86-NO-RAOINT:       # %bb.0:
+; X86-NO-RAOINT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NO-RAOINT-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NO-RAOINT-NEXT:    lock addl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT:    lock addl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT:    lock addl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT:    lock addl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT:    lock addl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT:    lock xaddl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT:    retl
+;
+; X86-RAO-INT-LABEL: atomic_add32:
+; X86-RAO-INT:       # %bb.0:
+; X86-RAO-INT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-RAO-INT-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-RAO-INT-NEXT:    aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT:    lock xaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT:    retl
+;
+; X64-NO-RAOINT-LABEL: atomic_add32:
+; X64-NO-RAOINT:       # %bb.0:
+; X64-NO-RAOINT-NEXT:    movl %esi, %eax
+; X64-NO-RAOINT-NEXT:    lock addl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock addl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock addl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock addl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock addl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock xaddl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT:    retq
+;
+; X64-RAO-INT-LABEL: atomic_add32:
+; X64-RAO-INT:       # %bb.0:
+; X64-RAO-INT-NEXT:    movl %esi, %eax
+; X64-RAO-INT-NEXT:    aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT:    lock xaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT:    retq
+  %1 = atomicrmw add i32* %p, i32 %val monotonic
+  %2 = atomicrmw add i32* %p, i32 %val acquire
+  %3 = atomicrmw add i32* %p, i32 %val release
+  %4 = atomicrmw add i32* %p, i32 %val acq_rel
+  %5 = atomicrmw add i32* %p, i32 %val seq_cst
+
+  %6 = atomicrmw add i32* %p, i32 %val seq_cst
+  ret i32 %6
+}
+
+define i32 @atomic_or32(i32* nocapture %p, i32 %val) nounwind ssp {
+; X86-NO-RAOINT-LABEL: atomic_or32:
+; X86-NO-RAOINT:       # %bb.0:
+; X86-NO-RAOINT-NEXT:    pushl %esi
+; X86-NO-RAOINT-NEXT:    subl $16, %esp
+; X86-NO-RAOINT-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NO-RAOINT-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT:    lock orl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT:    lock orl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT:    lock orl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT:    lock orl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT:    lock orl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT:    movl (%eax), %eax
+; X86-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT:  .LBB1_1: # %atomicrmw.start
+; X86-NO-RAOINT-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NO-RAOINT-NEXT:    movl %eax, %edx
+; X86-NO-RAOINT-NEXT:    orl %esi, %edx
+; X86-NO-RAOINT-NEXT:    lock cmpxchgl %edx, (%ecx)
+; X86-NO-RAOINT-NEXT:    sete %cl
+; X86-NO-RAOINT-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NO-RAOINT-NEXT:    testb $1, %cl
+; X86-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT:    jne .LBB1_2
+; X86-NO-RAOINT-NEXT:    jmp .LBB1_1
+; X86-NO-RAOINT-NEXT:  .LBB1_2: # %atomicrmw.end
+; X86-NO-RAOINT-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NO-RAOINT-NEXT:    addl $16, %esp
+; X86-NO-RAOINT-NEXT:    popl %esi
+; X86-NO-RAOINT-NEXT:    retl
+;
+; X86-RAO-INT-LABEL: atomic_or32:
+; X86-RAO-INT:       # %bb.0:
+; X86-RAO-INT-NEXT:    pushl %esi
+; X86-RAO-INT-NEXT:    subl $16, %esp
+; X86-RAO-INT-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-RAO-INT-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT:    aorl %ecx, (%eax)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aorl %ecx, (%eax)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aorl %ecx, (%eax)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aorl %ecx, (%eax)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aorl %ecx, (%eax)
+; X86-RAO-INT-NEXT:    movl (%eax), %eax
+; X86-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT:  .LBB1_1: # %atomicrmw.start
+; X86-RAO-INT-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-RAO-INT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-RAO-INT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-RAO-INT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-RAO-INT-NEXT:    movl %eax, %edx
+; X86-RAO-INT-NEXT:    orl %esi, %edx
+; X86-RAO-INT-NEXT:    lock cmpxchgl %edx, (%ecx)
+; X86-RAO-INT-NEXT:    sete %cl
+; X86-RAO-INT-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-RAO-INT-NEXT:    testb $1, %cl
+; X86-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT:    jne .LBB1_2
+; X86-RAO-INT-NEXT:    jmp .LBB1_1
+; X86-RAO-INT-NEXT:  .LBB1_2: # %atomicrmw.end
+; X86-RAO-INT-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-RAO-INT-NEXT:    addl $16, %esp
+; X86-RAO-INT-NEXT:    popl %esi
+; X86-RAO-INT-NEXT:    retl
+;
+; X64-NO-RAOINT-LABEL: atomic_or32:
+; X64-NO-RAOINT:       # %bb.0:
+; X64-NO-RAOINT-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NO-RAOINT-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT:    lock orl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock orl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock orl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock orl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock orl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT:    movl (%rdi), %eax
+; X64-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT:  .LBB1_1: # %atomicrmw.start
+; X64-NO-RAOINT-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-NO-RAOINT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
+; X64-NO-RAOINT-NEXT:    movl %eax, %edx
+; X64-NO-RAOINT-NEXT:    orl %esi, %edx
+; X64-NO-RAOINT-NEXT:    lock cmpxchgl %edx, (%rcx)
+; X64-NO-RAOINT-NEXT:    sete %cl
+; X64-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT:    testb $1, %cl
+; X64-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT:    jne .LBB1_2
+; X64-NO-RAOINT-NEXT:    jmp .LBB1_1
+; X64-NO-RAOINT-NEXT:  .LBB1_2: # %atomicrmw.end
+; X64-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-NO-RAOINT-NEXT:    retq
+;
+; X64-RAO-INT-LABEL: atomic_or32:
+; X64-RAO-INT:       # %bb.0:
+; X64-RAO-INT-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-RAO-INT-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT:    aorl %esi, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aorl %esi, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aorl %esi, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aorl %esi, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aorl %esi, (%rdi)
+; X64-RAO-INT-NEXT:    movl (%rdi), %eax
+; X64-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT:  .LBB1_1: # %atomicrmw.start
+; X64-RAO-INT-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-RAO-INT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-RAO-INT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-RAO-INT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
+; X64-RAO-INT-NEXT:    movl %eax, %edx
+; X64-RAO-INT-NEXT:    orl %esi, %edx
+; X64-RAO-INT-NEXT:    lock cmpxchgl %edx, (%rcx)
+; X64-RAO-INT-NEXT:    sete %cl
+; X64-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT:    testb $1, %cl
+; X64-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT:    jne .LBB1_2
+; X64-RAO-INT-NEXT:    jmp .LBB1_1
+; X64-RAO-INT-NEXT:  .LBB1_2: # %atomicrmw.end
+; X64-RAO-INT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-RAO-INT-NEXT:    retq
+  %1 = atomicrmw or i32* %p, i32 %val monotonic
+  %2 = atomicrmw or i32* %p, i32 %val acquire
+  %3 = atomicrmw or i32* %p, i32 %val release
+  %4 = atomicrmw or i32* %p, i32 %val acq_rel
+  %5 = atomicrmw or i32* %p, i32 %val seq_cst
+
+  %6 = atomicrmw or i32* %p, i32 %val seq_cst
+  ret i32 %6
+}
+
+define i32 @atomic_xor32(i32* nocapture %p, i32 %val) nounwind ssp {
+; X86-NO-RAOINT-LABEL: atomic_xor32:
+; X86-NO-RAOINT:       # %bb.0:
+; X86-NO-RAOINT-NEXT:    pushl %esi
+; X86-NO-RAOINT-NEXT:    subl $16, %esp
+; X86-NO-RAOINT-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NO-RAOINT-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT:    lock xorl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT:    lock xorl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT:    lock xorl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT:    lock xorl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT:    lock xorl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT:    movl (%eax), %eax
+; X86-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT:  .LBB2_1: # %atomicrmw.start
+; X86-NO-RAOINT-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NO-RAOINT-NEXT:    movl %eax, %edx
+; X86-NO-RAOINT-NEXT:    xorl %esi, %edx
+; X86-NO-RAOINT-NEXT:    lock cmpxchgl %edx, (%ecx)
+; X86-NO-RAOINT-NEXT:    sete %cl
+; X86-NO-RAOINT-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NO-RAOINT-NEXT:    testb $1, %cl
+; X86-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT:    jne .LBB2_2
+; X86-NO-RAOINT-NEXT:    jmp .LBB2_1
+; X86-NO-RAOINT-NEXT:  .LBB2_2: # %atomicrmw.end
+; X86-NO-RAOINT-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NO-RAOINT-NEXT:    addl $16, %esp
+; X86-NO-RAOINT-NEXT:    popl %esi
+; X86-NO-RAOINT-NEXT:    retl
+;
+; X86-RAO-INT-LABEL: atomic_xor32:
+; X86-RAO-INT:       # %bb.0:
+; X86-RAO-INT-NEXT:    pushl %esi
+; X86-RAO-INT-NEXT:    subl $16, %esp
+; X86-RAO-INT-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-RAO-INT-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT:    axorl %ecx, (%eax)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    axorl %ecx, (%eax)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    axorl %ecx, (%eax)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    axorl %ecx, (%eax)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    axorl %ecx, (%eax)
+; X86-RAO-INT-NEXT:    movl (%eax), %eax
+; X86-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT:  .LBB2_1: # %atomicrmw.start
+; X86-RAO-INT-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-RAO-INT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-RAO-INT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-RAO-INT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-RAO-INT-NEXT:    movl %eax, %edx
+; X86-RAO-INT-NEXT:    xorl %esi, %edx
+; X86-RAO-INT-NEXT:    lock cmpxchgl %edx, (%ecx)
+; X86-RAO-INT-NEXT:    sete %cl
+; X86-RAO-INT-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-RAO-INT-NEXT:    testb $1, %cl
+; X86-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT:    jne .LBB2_2
+; X86-RAO-INT-NEXT:    jmp .LBB2_1
+; X86-RAO-INT-NEXT:  .LBB2_2: # %atomicrmw.end
+; X86-RAO-INT-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-RAO-INT-NEXT:    addl $16, %esp
+; X86-RAO-INT-NEXT:    popl %esi
+; X86-RAO-INT-NEXT:    retl
+;
+; X64-NO-RAOINT-LABEL: atomic_xor32:
+; X64-NO-RAOINT:       # %bb.0:
+; X64-NO-RAOINT-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NO-RAOINT-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT:    lock xorl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock xorl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock xorl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock xorl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock xorl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT:    movl (%rdi), %eax
+; X64-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT:  .LBB2_1: # %atomicrmw.start
+; X64-NO-RAOINT-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-NO-RAOINT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
+; X64-NO-RAOINT-NEXT:    movl %eax, %edx
+; X64-NO-RAOINT-NEXT:    xorl %esi, %edx
+; X64-NO-RAOINT-NEXT:    lock cmpxchgl %edx, (%rcx)
+; X64-NO-RAOINT-NEXT:    sete %cl
+; X64-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT:    testb $1, %cl
+; X64-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT:    jne .LBB2_2
+; X64-NO-RAOINT-NEXT:    jmp .LBB2_1
+; X64-NO-RAOINT-NEXT:  .LBB2_2: # %atomicrmw.end
+; X64-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-NO-RAOINT-NEXT:    retq
+;
+; X64-RAO-INT-LABEL: atomic_xor32:
+; X64-RAO-INT:       # %bb.0:
+; X64-RAO-INT-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-RAO-INT-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT:    axorl %esi, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    axorl %esi, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    axorl %esi, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    axorl %esi, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    axorl %esi, (%rdi)
+; X64-RAO-INT-NEXT:    movl (%rdi), %eax
+; X64-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT:  .LBB2_1: # %atomicrmw.start
+; X64-RAO-INT-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-RAO-INT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-RAO-INT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-RAO-INT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
+; X64-RAO-INT-NEXT:    movl %eax, %edx
+; X64-RAO-INT-NEXT:    xorl %esi, %edx
+; X64-RAO-INT-NEXT:    lock cmpxchgl %edx, (%rcx)
+; X64-RAO-INT-NEXT:    sete %cl
+; X64-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT:    testb $1, %cl
+; X64-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT:    jne .LBB2_2
+; X64-RAO-INT-NEXT:    jmp .LBB2_1
+; X64-RAO-INT-NEXT:  .LBB2_2: # %atomicrmw.end
+; X64-RAO-INT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-RAO-INT-NEXT:    retq
+  %1 = atomicrmw xor i32* %p, i32 %val monotonic
+  %2 = atomicrmw xor i32* %p, i32 %val acquire
+  %3 = atomicrmw xor i32* %p, i32 %val release
+  %4 = atomicrmw xor i32* %p, i32 %val acq_rel
+  %5 = atomicrmw xor i32* %p, i32 %val seq_cst
+
+  %6 = atomicrmw xor i32* %p, i32 %val seq_cst
+  ret i32 %6
+}
+
+define i32 @atomic_and32(i32* nocapture %p, i32 %val) nounwind ssp {
+; X86-NO-RAOINT-LABEL: atomic_and32:
+; X86-NO-RAOINT:       # %bb.0:
+; X86-NO-RAOINT-NEXT:    pushl %esi
+; X86-NO-RAOINT-NEXT:    subl $16, %esp
+; X86-NO-RAOINT-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NO-RAOINT-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT:    lock andl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT:    lock andl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT:    lock andl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT:    lock andl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT:    lock andl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT:    movl (%eax), %eax
+; X86-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT:  .LBB3_1: # %atomicrmw.start
+; X86-NO-RAOINT-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NO-RAOINT-NEXT:    movl %eax, %edx
+; X86-NO-RAOINT-NEXT:    andl %esi, %edx
+; X86-NO-RAOINT-NEXT:    lock cmpxchgl %edx, (%ecx)
+; X86-NO-RAOINT-NEXT:    sete %cl
+; X86-NO-RAOINT-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NO-RAOINT-NEXT:    testb $1, %cl
+; X86-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT:    jne .LBB3_2
+; X86-NO-RAOINT-NEXT:    jmp .LBB3_1
+; X86-NO-RAOINT-NEXT:  .LBB3_2: # %atomicrmw.end
+; X86-NO-RAOINT-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NO-RAOINT-NEXT:    addl $16, %esp
+; X86-NO-RAOINT-NEXT:    popl %esi
+; X86-NO-RAOINT-NEXT:    retl
+;
+; X86-RAO-INT-LABEL: atomic_and32:
+; X86-RAO-INT:       # %bb.0:
+; X86-RAO-INT-NEXT:    pushl %esi
+; X86-RAO-INT-NEXT:    subl $16, %esp
+; X86-RAO-INT-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-RAO-INT-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT:    aandl %ecx, (%eax)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aandl %ecx, (%eax)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aandl %ecx, (%eax)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aandl %ecx, (%eax)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aandl %ecx, (%eax)
+; X86-RAO-INT-NEXT:    movl (%eax), %eax
+; X86-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT:  .LBB3_1: # %atomicrmw.start
+; X86-RAO-INT-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-RAO-INT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-RAO-INT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-RAO-INT-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-RAO-INT-NEXT:    movl %eax, %edx
+; X86-RAO-INT-NEXT:    andl %esi, %edx
+; X86-RAO-INT-NEXT:    lock cmpxchgl %edx, (%ecx)
+; X86-RAO-INT-NEXT:    sete %cl
+; X86-RAO-INT-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-RAO-INT-NEXT:    testb $1, %cl
+; X86-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT:    jne .LBB3_2
+; X86-RAO-INT-NEXT:    jmp .LBB3_1
+; X86-RAO-INT-NEXT:  .LBB3_2: # %atomicrmw.end
+; X86-RAO-INT-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-RAO-INT-NEXT:    addl $16, %esp
+; X86-RAO-INT-NEXT:    popl %esi
+; X86-RAO-INT-NEXT:    retl
+;
+; X64-NO-RAOINT-LABEL: atomic_and32:
+; X64-NO-RAOINT:       # %bb.0:
+; X64-NO-RAOINT-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NO-RAOINT-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT:    lock andl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock andl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock andl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock andl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock andl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT:    movl (%rdi), %eax
+; X64-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT:  .LBB3_1: # %atomicrmw.start
+; X64-NO-RAOINT-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-NO-RAOINT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
+; X64-NO-RAOINT-NEXT:    movl %eax, %edx
+; X64-NO-RAOINT-NEXT:    andl %esi, %edx
+; X64-NO-RAOINT-NEXT:    lock cmpxchgl %edx, (%rcx)
+; X64-NO-RAOINT-NEXT:    sete %cl
+; X64-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT:    testb $1, %cl
+; X64-NO-RAOINT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT:    jne .LBB3_2
+; X64-NO-RAOINT-NEXT:    jmp .LBB3_1
+; X64-NO-RAOINT-NEXT:  .LBB3_2: # %atomicrmw.end
+; X64-NO-RAOINT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-NO-RAOINT-NEXT:    retq
+;
+; X64-RAO-INT-LABEL: atomic_and32:
+; X64-RAO-INT:       # %bb.0:
+; X64-RAO-INT-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-RAO-INT-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT:    aandl %esi, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aandl %esi, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aandl %esi, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aandl %esi, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aandl %esi, (%rdi)
+; X64-RAO-INT-NEXT:    movl (%rdi), %eax
+; X64-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT:  .LBB3_1: # %atomicrmw.start
+; X64-RAO-INT-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-RAO-INT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-RAO-INT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-RAO-INT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
+; X64-RAO-INT-NEXT:    movl %eax, %edx
+; X64-RAO-INT-NEXT:    andl %esi, %edx
+; X64-RAO-INT-NEXT:    lock cmpxchgl %edx, (%rcx)
+; X64-RAO-INT-NEXT:    sete %cl
+; X64-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT:    testb $1, %cl
+; X64-RAO-INT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT:    jne .LBB3_2
+; X64-RAO-INT-NEXT:    jmp .LBB3_1
+; X64-RAO-INT-NEXT:  .LBB3_2: # %atomicrmw.end
+; X64-RAO-INT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-RAO-INT-NEXT:    retq
+  %1 = atomicrmw and i32* %p, i32 %val monotonic
+  %2 = atomicrmw and i32* %p, i32 %val acquire
+  %3 = atomicrmw and i32* %p, i32 %val release
+  %4 = atomicrmw and i32* %p, i32 %val acq_rel
+  %5 = atomicrmw and i32* %p, i32 %val seq_cst
+
+  %6 = atomicrmw and i32* %p, i32 %val seq_cst
+  ret i32 %6
+}
+
+define i32 @atomic_sub32(i32* nocapture %p, i32 %val) nounwind ssp {
+; X86-NO-RAOINT-LABEL: atomic_sub32:
+; X86-NO-RAOINT:       # %bb.0:
+; X86-NO-RAOINT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NO-RAOINT-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NO-RAOINT-NEXT:    lock subl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT:    lock subl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT:    lock subl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT:    lock subl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT:    lock subl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT:    negl %eax
+; X86-NO-RAOINT-NEXT:    lock xaddl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT:    retl
+;
+; X86-RAO-INT-LABEL: atomic_sub32:
+; X86-RAO-INT:       # %bb.0:
+; X86-RAO-INT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-RAO-INT-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-RAO-INT-NEXT:    negl %eax
+; X86-RAO-INT-NEXT:    aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT:    mfence
+; X86-RAO-INT-NEXT:    aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT:    lock xaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT:    retl
+;
+; X64-NO-RAOINT-LABEL: atomic_sub32:
+; X64-NO-RAOINT:       # %bb.0:
+; X64-NO-RAOINT-NEXT:    movl %esi, %eax
+; X64-NO-RAOINT-NEXT:    lock subl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock subl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock subl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock subl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT:    lock subl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT:    negl %eax
+; X64-NO-RAOINT-NEXT:    lock xaddl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT:    retq
+;
+; X64-RAO-INT-LABEL: atomic_sub32:
+; X64-RAO-INT:       # %bb.0:
+; X64-RAO-INT-NEXT:    movl %esi, %eax
+; X64-RAO-INT-NEXT:    negl %eax
+; X64-RAO-INT-NEXT:    aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT:    mfence
+; X64-RAO-INT-NEXT:    aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT:    lock xaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT:    retq
+  %1 = atomicrmw sub i32* %p, i32 %val monotonic
+  %2 = atomicrmw sub i32* %p, i32 %val acquire
+  %3 = atomicrmw sub i32* %p, i32 %val release
+  %4 = atomicrmw sub i32* %p, i32 %val acq_rel
+  %5 = atomicrmw sub i32* %p, i32 %val seq_cst
+
+  %6 = atomicrmw sub i32* %p, i32 %val seq_cst
+  ret i32 %6
+}
Index: llvm/lib/Target/X86/X86InstrRAOINT.td
===================================================================
--- /dev/null
+++ llvm/lib/Target/X86/X86InstrRAOINT.td
@@ -0,0 +1,45 @@
+//===---- X86InstrRAOINT.td -------------------------------*- tablegen -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel AMX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// RAO-INT instructions
+
+def SDTRAOBinaryArith : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+
+def X86rao_add  : SDNode<"X86ISD::RADD", SDTRAOBinaryArith,
+                         [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86rao_or   : SDNode<"X86ISD::ROR",  SDTRAOBinaryArith,
+                         [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86rao_xor  : SDNode<"X86ISD::RXOR", SDTRAOBinaryArith,
+                         [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86rao_and  : SDNode<"X86ISD::RAND", SDTRAOBinaryArith,
+                         [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+multiclass RAOINT_BASE<string OpcodeStr> {
+  let Predicates = [HasRAOINT] in
+    def 32mr : I<0xfc, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                 !strconcat("a", OpcodeStr, "{l}\t{$src, $dst|$dst, $src}"),
+                 [(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR32:$src)]>,
+               Sched<[WriteALURMW]>;
+
+  let Predicates = [HasRAOINT, In64BitMode] in
+    def 64mr : I<0xfc, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                 !strconcat("a", OpcodeStr, "{q}\t{$src, $dst|$dst, $src}"),
+                 [(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR64:$src)]>,
+               Sched<[WriteALURMW]>, REX_W;
+}
+
+defm AADD : RAOINT_BASE<"add">, T8PS;
+defm AAND : RAOINT_BASE<"and">, T8PD;
+defm AOR  : RAOINT_BASE<"or" >, T8XD;
+defm AXOR : RAOINT_BASE<"xor">, T8XS;
Index: llvm/lib/Target/X86/X86InstrInfo.td
===================================================================
--- llvm/lib/Target/X86/X86InstrInfo.td
+++ llvm/lib/Target/X86/X86InstrInfo.td
@@ -980,6 +980,7 @@
 def HasPCONFIG   : Predicate<"Subtarget->hasPCONFIG()">;
 def HasENQCMD    : Predicate<"Subtarget->hasENQCMD()">;
 def HasKL        : Predicate<"Subtarget->hasKL()">;
+def HasRAOINT    : Predicate<"Subtarget->hasRAOINT()">;
 def HasWIDEKL    : Predicate<"Subtarget->hasWIDEKL()">;
 def HasHRESET    : Predicate<"Subtarget->hasHRESET()">;
 def HasSERIALIZE : Predicate<"Subtarget->hasSERIALIZE()">;
@@ -3167,6 +3168,9 @@
 // AMX instructions
 include "X86InstrAMX.td"
 
+// RAO-INT instructions
+include "X86InstrRAOINT.td"
+
 // System instructions.
 include "X86InstrSystem.td"
 
Index: llvm/lib/Target/X86/X86ISelLowering.h
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.h
+++ llvm/lib/Target/X86/X86ISelLowering.h
@@ -793,6 +793,13 @@
     LBTC,
     LBTR,
 
+    /// RAO arithmetic instructions.
+    /// OUTCHAIN = RADD(INCHAIN, PTR, RHS)
+    RADD,
+    ROR,
+    RXOR,
+    RAND,
+
     // Load, scalar_to_vector, and zero extend.
     VZEXT_LOAD,
 
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31796,6 +31796,52 @@
     return N;
   }
 
+  // We can lower add/sub/or/xor/and into RAO-INT instructions when the result
+  // is unused.
+  // TODO: We can manually widen i8/i16 to i32 here to use RAO-INT instruction.
+  if (Subtarget.hasRAOINT() &&
+      (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()))) {
+    if (Opc == ISD::ATOMIC_LOAD_SUB) {
+      RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
+      return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, RHS,
+                           AN->getMemOperand());
+    }
+    unsigned NewOpc = 0;
+    switch (N->getOpcode()) {
+    case ISD::ATOMIC_LOAD_ADD:
+      NewOpc = X86ISD::RADD;
+      break;
+    case ISD::ATOMIC_LOAD_OR:
+      NewOpc = X86ISD::ROR;
+      break;
+    case ISD::ATOMIC_LOAD_XOR:
+      NewOpc = X86ISD::RXOR;
+      break;
+    case ISD::ATOMIC_LOAD_AND:
+      NewOpc = X86ISD::RAND;
+      break;
+    default:
+      llvm_unreachable("Unexpected ATOMIC_LOAD_ opcode");
+    }
+
+    // RAO-INT instructions are weakly-ordered. We need insert MFENCE for order
+    // stronger than monotonic.
+    // FIXME: Do we just need LFENCE for acquire?
+    // FIXME: Do we need trailing fence?
+    if (isStrongerThanMonotonic(AN->getSuccessOrdering())) {
+      assert(Subtarget.hasMFence() && "MFENCE is required");
+      Chain = DAG.getNode(X86ISD::MFENCE, DL, MVT::Other, Chain);
+    }
+
+    MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
+    SDValue RAO = DAG.getMemIntrinsicNode(NewOpc, DL, DAG.getVTList(MVT::Other),
+                                          {Chain, LHS, RHS}, VT, MMO);
+
+    // NOTE: The getUNDEF is needed to give something for the unused result 0.
+    return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), DAG.getUNDEF(VT),
+                       RAO);
+  }
+
   // Specialized lowering for the canonical form of an idemptotent atomicrmw.
   // The core idea here is that since the memory location isn't actually
   // changing, all we need is a lowering for the *ordering* impacts of the
@@ -33709,6 +33755,10 @@
   NODE_NAME_CASE(LBTS)
   NODE_NAME_CASE(LBTC)
   NODE_NAME_CASE(LBTR)
+  NODE_NAME_CASE(RADD)
+  NODE_NAME_CASE(ROR)
+  NODE_NAME_CASE(RXOR)
+  NODE_NAME_CASE(RAND)
   NODE_NAME_CASE(VZEXT_MOVL)
   NODE_NAME_CASE(VZEXT_LOAD)
   NODE_NAME_CASE(VEXTRACT_STORE)
Index: llvm/lib/Target/X86/X86.td
===================================================================
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -254,6 +254,9 @@
 def FeatureAMXBF16     : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
                                       "Support AMX-BF16 instructions",
                                       [FeatureAMXTILE]>;
+def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true",
+                                     "Support RAO-INT instructions",
+                                     [FeatureSSE2]>;
 def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
                                       "Invalidate Process-Context Identifier">;
 def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
Index: llvm/lib/Support/X86TargetParser.cpp
===================================================================
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -581,6 +581,7 @@
 constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
 constexpr FeatureBitset ImpliedFeaturesHRESET = {};
 
+static constexpr FeatureBitset ImpliedFeaturesRAOINT = FeatureSSE2;
 static constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
     FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL;
 // Key Locker Features
Index: llvm/lib/Support/Host.cpp
===================================================================
--- llvm/lib/Support/Host.cpp
+++ llvm/lib/Support/Host.cpp
@@ -1805,6 +1805,7 @@
   Features["amx-int8"]   = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
   bool HasLeaf7Subleaf1 =
       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+  Features["raoint"]     = HasLeaf7Subleaf1 && ((EAX >> 3) & 1);
   Features["avxvnni"]    = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
Index: llvm/include/llvm/Support/X86TargetParser.def
===================================================================
--- llvm/include/llvm/Support/X86TargetParser.def
+++ llvm/include/llvm/Support/X86TargetParser.def
@@ -200,6 +200,7 @@
 X86_FEATURE       (XSAVEOPT,        "xsaveopt")
 X86_FEATURE       (XSAVES,          "xsaves")
 X86_FEATURE       (HRESET,          "hreset")
+X86_FEATURE       (RAOINT,          "raoint")
 X86_FEATURE       (AVX512FP16,      "avx512fp16")
 X86_FEATURE       (AVXVNNI,         "avxvnni")
 // These features aren't really CPU features, but the frontend can set them.
Index: clang/test/Preprocessor/x86_target_features.c
===================================================================
--- clang/test/Preprocessor/x86_target_features.c
+++ clang/test/Preprocessor/x86_target_features.c
@@ -581,6 +581,14 @@
 // AVX512FP16NOAVX512DQ-NOT: #define __AVX512DQ__ 1
 // AVX512FP16NOAVX512DQ-NOT: #define __AVX512FP16__ 1
 
+// RUN: %clang -target i386-unknown-linux-gnu -march=atom -mraoint -x c -E -dM -o - %s | FileCheck  -check-prefix=RAOINT %s
+
+// RAOINT: #define __RAOINT__ 1
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=atom -mno-raoint -x c -E -dM -o - %s | FileCheck  -check-prefix=NO-RAOINT %s
+
+// NO-RAOINT-NOT: #define __RAOINT__ 1
+
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s
 
 // CRC32: #define __CRC32__ 1
Index: clang/test/Driver/x86-target-features.c
===================================================================
--- clang/test/Driver/x86-target-features.c
+++ clang/test/Driver/x86-target-features.c
@@ -305,6 +305,11 @@
 // AVX512FP16: "-target-feature" "+avx512fp16"
 // NO-AVX512FP16: "-target-feature" "-avx512fp16"
 
+// RUN: %clang --target=i386 -march=i386 -mraoint %s -### 2>&1 | FileCheck -check-prefix=RAOINT %s
+// RUN: %clang --target=i386 -march=i386 -mno-raoint %s -### 2>&1 | FileCheck -check-prefix=NO-RAOINT %s
+// RAOINT: "-target-feature" "+raoint"
+// NO-RAOINT: "-target-feature" "-raoint"
+
 // RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s
 // RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
 // CRC32: "-target-feature" "+crc32"
Index: clang/lib/Headers/cpuid.h
===================================================================
--- clang/lib/Headers/cpuid.h
+++ clang/lib/Headers/cpuid.h
@@ -200,6 +200,7 @@
 #define bit_AMXINT8       0x02000000
 
 /* Features in %eax for leaf 7 sub-leaf 1 */
+#define bit_RAOINT        0x00000008
 #define bit_AVXVNNI       0x00000010
 #define bit_AVX512BF16    0x00000020
 #define bit_HRESET        0x00400000
Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -135,6 +135,7 @@
   bool HasPTWRITE = false;
   bool HasINVPCID = false;
   bool HasENQCMD = false;
+  bool HasRAOINT = false;
   bool HasKL = false;      // For key locker
   bool HasWIDEKL = false; // For wide key locker
   bool HasHRESET = false;
Index: clang/lib/Basic/Targets/X86.cpp
===================================================================
--- clang/lib/Basic/Targets/X86.cpp
+++ clang/lib/Basic/Targets/X86.cpp
@@ -330,6 +330,8 @@
       HasAMXINT8 = true;
     } else if (Feature == "+amx-tile") {
       HasAMXTILE = true;
+    } else if (Feature == "+raoint") {
+      HasRAOINT = true;
     } else if (Feature == "+avxvnni") {
       HasAVXVNNI = true;
     } else if (Feature == "+serialize") {
@@ -774,6 +776,8 @@
     Builder.defineMacro("__AMXINT8__");
   if (HasAMXBF16)
     Builder.defineMacro("__AMXBF16__");
+  if (HasRAOINT)
+    Builder.defineMacro("__RAOINT__");
   if (HasAVXVNNI)
     Builder.defineMacro("__AVXVNNI__");
   if (HasSERIALIZE)
@@ -932,6 +936,7 @@
       .Case("prefetchwt1", true)
       .Case("prfchw", true)
       .Case("ptwrite", true)
+      .Case("raoint", true)
       .Case("rdpid", true)
       .Case("rdpru", true)
       .Case("rdrnd", true)
@@ -1028,6 +1033,7 @@
       .Case("prefetchwt1", HasPREFETCHWT1)
       .Case("prfchw", HasPRFCHW)
       .Case("ptwrite", HasPTWRITE)
+      .Case("raoint", HasRAOINT)
       .Case("rdpid", HasRDPID)
       .Case("rdpru", HasRDPRU)
       .Case("rdrnd", HasRDRND)
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -4654,6 +4654,8 @@
 def mno_prfchw : Flag<["-"], "mno-prfchw">, Group<m_x86_Features_Group>;
 def mptwrite : Flag<["-"], "mptwrite">, Group<m_x86_Features_Group>;
 def mno_ptwrite : Flag<["-"], "mno-ptwrite">, Group<m_x86_Features_Group>;
+def mraoint : Flag<["-"], "mraoint">, Group<m_x86_Features_Group>;
+def mno_raoint : Flag<["-"], "mno-raoint">, Group<m_x86_Features_Group>;
 def mrdpid : Flag<["-"], "mrdpid">, Group<m_x86_Features_Group>;
 def mno_rdpid : Flag<["-"], "mno-rdpid">, Group<m_x86_Features_Group>;
 def mrdpru : Flag<["-"], "mrdpru">, Group<m_x86_Features_Group>;
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -550,6 +550,7 @@
 --------------------
 - Support ``-mindirect-branch-cs-prefix`` for call and jmp to indirect thunk.
 - Fix 32-bit ``__fastcall`` and ``__vectorcall`` ABI mismatch with MSVC.
+- Add support for ``RAO-INT`` instructions.
 
 DWARF Support in Clang
 ----------------------

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D135951: [X86][WIP] SUPPORT RAO-INT

Reply via email to