pengfei updated this revision to Diff 468020.
pengfei added a comment.
Add atomic operations lowering for RAO-INT instructions.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D135951/new/
https://reviews.llvm.org/D135951
Files:
clang/docs/ReleaseNotes.rst
clang/include/clang/Driver/Options.td
clang/lib/Basic/Targets/X86.cpp
clang/lib/Basic/Targets/X86.h
clang/lib/Headers/cpuid.h
clang/test/Driver/x86-target-features.c
clang/test/Preprocessor/x86_target_features.c
llvm/include/llvm/Support/X86TargetParser.def
llvm/lib/Support/Host.cpp
llvm/lib/Support/X86TargetParser.cpp
llvm/lib/Target/X86/X86.td
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86InstrInfo.td
llvm/lib/Target/X86/X86InstrRAOINT.td
llvm/test/CodeGen/X86/atomic-instructions-32.ll
llvm/test/CodeGen/X86/atomic-instructions-64.ll
llvm/test/MC/Disassembler/X86/rao-int.txt
llvm/test/MC/Disassembler/X86/x86-64-rao-int.txt
llvm/test/MC/X86/rao-int-att.s
llvm/test/MC/X86/rao-int-intel.s
llvm/test/MC/X86/x86-64-rao-int-att.s
llvm/test/MC/X86/x86-64-rao-int-intel.s
Index: llvm/test/MC/X86/x86-64-rao-int-intel.s
===================================================================
--- /dev/null
+++ llvm/test/MC/X86/x86-64-rao-int-intel.s
@@ -0,0 +1,193 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: aadd qword ptr [rbp + 8*r14 + 268435456], r9
+// CHECK: encoding: [0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ aadd qword ptr [rbp + 8*r14 + 268435456], r9
+
+// CHECK: aadd qword ptr [r8 + 4*rax + 291], r9
+// CHECK: encoding: [0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+ aadd qword ptr [r8 + 4*rax + 291], r9
+
+// CHECK: aadd qword ptr [rip], r9
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+ aadd qword ptr [rip], r9
+
+// CHECK: aadd qword ptr [2*rbp - 512], r9
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+ aadd qword ptr [2*rbp - 512], r9
+
+// CHECK: aadd qword ptr [rcx + 2032], r9
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+ aadd qword ptr [rcx + 2032], r9
+
+// CHECK: aadd qword ptr [rdx - 2048], r9
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+ aadd qword ptr [rdx - 2048], r9
+
+// CHECK: aadd dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ aadd dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK: aadd dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ aadd dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK: aadd dword ptr [eax], ebx
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x18]
+ aadd dword ptr [eax], ebx
+
+// CHECK: aadd dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ aadd dword ptr [2*ebp - 512], ebx
+
+// CHECK: aadd dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ aadd dword ptr [ecx + 2032], ebx
+
+// CHECK: aadd dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ aadd dword ptr [edx - 2048], ebx
+
+// CHECK: aand qword ptr [rbp + 8*r14 + 268435456], r9
+// CHECK: encoding: [0x66,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ aand qword ptr [rbp + 8*r14 + 268435456], r9
+
+// CHECK: aand qword ptr [r8 + 4*rax + 291], r9
+// CHECK: encoding: [0x66,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+ aand qword ptr [r8 + 4*rax + 291], r9
+
+// CHECK: aand qword ptr [rip], r9
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+ aand qword ptr [rip], r9
+
+// CHECK: aand qword ptr [2*rbp - 512], r9
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+ aand qword ptr [2*rbp - 512], r9
+
+// CHECK: aand qword ptr [rcx + 2032], r9
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+ aand qword ptr [rcx + 2032], r9
+
+// CHECK: aand qword ptr [rdx - 2048], r9
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+ aand qword ptr [rdx - 2048], r9
+
+// CHECK: aand dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ aand dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK: aand dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ aand dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK: aand dword ptr [eax], ebx
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x18]
+ aand dword ptr [eax], ebx
+
+// CHECK: aand dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ aand dword ptr [2*ebp - 512], ebx
+
+// CHECK: aand dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ aand dword ptr [ecx + 2032], ebx
+
+// CHECK: aand dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ aand dword ptr [edx - 2048], ebx
+
+// CHECK: aor qword ptr [rbp + 8*r14 + 268435456], r9
+// CHECK: encoding: [0xf2,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ aor qword ptr [rbp + 8*r14 + 268435456], r9
+
+// CHECK: aor qword ptr [r8 + 4*rax + 291], r9
+// CHECK: encoding: [0xf2,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+ aor qword ptr [r8 + 4*rax + 291], r9
+
+// CHECK: aor qword ptr [rip], r9
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+ aor qword ptr [rip], r9
+
+// CHECK: aor qword ptr [2*rbp - 512], r9
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+ aor qword ptr [2*rbp - 512], r9
+
+// CHECK: aor qword ptr [rcx + 2032], r9
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+ aor qword ptr [rcx + 2032], r9
+
+// CHECK: aor qword ptr [rdx - 2048], r9
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+ aor qword ptr [rdx - 2048], r9
+
+// CHECK: aor dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ aor dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK: aor dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ aor dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK: aor dword ptr [eax], ebx
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x18]
+ aor dword ptr [eax], ebx
+
+// CHECK: aor dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ aor dword ptr [2*ebp - 512], ebx
+
+// CHECK: aor dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ aor dword ptr [ecx + 2032], ebx
+
+// CHECK: aor dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ aor dword ptr [edx - 2048], ebx
+
+// CHECK: axor qword ptr [rbp + 8*r14 + 268435456], r9
+// CHECK: encoding: [0xf3,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ axor qword ptr [rbp + 8*r14 + 268435456], r9
+
+// CHECK: axor qword ptr [r8 + 4*rax + 291], r9
+// CHECK: encoding: [0xf3,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+ axor qword ptr [r8 + 4*rax + 291], r9
+
+// CHECK: axor qword ptr [rip], r9
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+ axor qword ptr [rip], r9
+
+// CHECK: axor qword ptr [2*rbp - 512], r9
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+ axor qword ptr [2*rbp - 512], r9
+
+// CHECK: axor qword ptr [rcx + 2032], r9
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+ axor qword ptr [rcx + 2032], r9
+
+// CHECK: axor qword ptr [rdx - 2048], r9
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+ axor qword ptr [rdx - 2048], r9
+
+// CHECK: axor dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ axor dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK: axor dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ axor dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK: axor dword ptr [eax], ebx
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x18]
+ axor dword ptr [eax], ebx
+
+// CHECK: axor dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ axor dword ptr [2*ebp - 512], ebx
+
+// CHECK: axor dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ axor dword ptr [ecx + 2032], ebx
+
+// CHECK: axor dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ axor dword ptr [edx - 2048], ebx
Index: llvm/test/MC/X86/x86-64-rao-int-att.s
===================================================================
--- /dev/null
+++ llvm/test/MC/X86/x86-64-rao-int-att.s
@@ -0,0 +1,193 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: aaddq %r9, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ aaddq %r9, 268435456(%rbp,%r14,8)
+
+// CHECK: aaddq %r9, 291(%r8,%rax,4)
+// CHECK: encoding: [0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+ aaddq %r9, 291(%r8,%rax,4)
+
+// CHECK: aaddq %r9, (%rip)
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+ aaddq %r9, (%rip)
+
+// CHECK: aaddq %r9, -512(,%rbp,2)
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+ aaddq %r9, -512(,%rbp,2)
+
+// CHECK: aaddq %r9, 2032(%rcx)
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+ aaddq %r9, 2032(%rcx)
+
+// CHECK: aaddq %r9, -2048(%rdx)
+// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+ aaddq %r9, -2048(%rdx)
+
+// CHECK: aaddl %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ aaddl %ebx, 268435456(%esp,%esi,8)
+
+// CHECK: aaddl %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ aaddl %ebx, 291(%edi,%eax,4)
+
+// CHECK: aaddl %ebx, (%eax)
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x18]
+ aaddl %ebx, (%eax)
+
+// CHECK: aaddl %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ aaddl %ebx, -512(,%ebp,2)
+
+// CHECK: aaddl %ebx, 2032(%ecx)
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ aaddl %ebx, 2032(%ecx)
+
+// CHECK: aaddl %ebx, -2048(%edx)
+// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ aaddl %ebx, -2048(%edx)
+
+// CHECK: aandq %r9, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0x66,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ aandq %r9, 268435456(%rbp,%r14,8)
+
+// CHECK: aandq %r9, 291(%r8,%rax,4)
+// CHECK: encoding: [0x66,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+ aandq %r9, 291(%r8,%rax,4)
+
+// CHECK: aandq %r9, (%rip)
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+ aandq %r9, (%rip)
+
+// CHECK: aandq %r9, -512(,%rbp,2)
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+ aandq %r9, -512(,%rbp,2)
+
+// CHECK: aandq %r9, 2032(%rcx)
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+ aandq %r9, 2032(%rcx)
+
+// CHECK: aandq %r9, -2048(%rdx)
+// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+ aandq %r9, -2048(%rdx)
+
+// CHECK: aandl %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ aandl %ebx, 268435456(%esp,%esi,8)
+
+// CHECK: aandl %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ aandl %ebx, 291(%edi,%eax,4)
+
+// CHECK: aandl %ebx, (%eax)
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x18]
+ aandl %ebx, (%eax)
+
+// CHECK: aandl %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ aandl %ebx, -512(,%ebp,2)
+
+// CHECK: aandl %ebx, 2032(%ecx)
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ aandl %ebx, 2032(%ecx)
+
+// CHECK: aandl %ebx, -2048(%edx)
+// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ aandl %ebx, -2048(%edx)
+
+// CHECK: aorq %r9, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0xf2,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ aorq %r9, 268435456(%rbp,%r14,8)
+
+// CHECK: aorq %r9, 291(%r8,%rax,4)
+// CHECK: encoding: [0xf2,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+ aorq %r9, 291(%r8,%rax,4)
+
+// CHECK: aorq %r9, (%rip)
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+ aorq %r9, (%rip)
+
+// CHECK: aorq %r9, -512(,%rbp,2)
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+ aorq %r9, -512(,%rbp,2)
+
+// CHECK: aorq %r9, 2032(%rcx)
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+ aorq %r9, 2032(%rcx)
+
+// CHECK: aorq %r9, -2048(%rdx)
+// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+ aorq %r9, -2048(%rdx)
+
+// CHECK: aorl %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ aorl %ebx, 268435456(%esp,%esi,8)
+
+// CHECK: aorl %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ aorl %ebx, 291(%edi,%eax,4)
+
+// CHECK: aorl %ebx, (%eax)
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x18]
+ aorl %ebx, (%eax)
+
+// CHECK: aorl %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ aorl %ebx, -512(,%ebp,2)
+
+// CHECK: aorl %ebx, 2032(%ecx)
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ aorl %ebx, 2032(%ecx)
+
+// CHECK: aorl %ebx, -2048(%edx)
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ aorl %ebx, -2048(%edx)
+
+// CHECK: axorq %r9, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0xf3,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ axorq %r9, 268435456(%rbp,%r14,8)
+
+// CHECK: axorq %r9, 291(%r8,%rax,4)
+// CHECK: encoding: [0xf3,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00]
+ axorq %r9, 291(%r8,%rax,4)
+
+// CHECK: axorq %r9, (%rip)
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00]
+ axorq %r9, (%rip)
+
+// CHECK: axorq %r9, -512(,%rbp,2)
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff]
+ axorq %r9, -512(,%rbp,2)
+
+// CHECK: axorq %r9, 2032(%rcx)
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00]
+ axorq %r9, 2032(%rcx)
+
+// CHECK: axorq %r9, -2048(%rdx)
+// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff]
+ axorq %r9, -2048(%rdx)
+
+// CHECK: axorl %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ axorl %ebx, 268435456(%esp,%esi,8)
+
+// CHECK: axorl %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ axorl %ebx, 291(%edi,%eax,4)
+
+// CHECK: axorl %ebx, (%eax)
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x18]
+ axorl %ebx, (%eax)
+
+// CHECK: axorl %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ axorl %ebx, -512(,%ebp,2)
+
+// CHECK: axorl %ebx, 2032(%ecx)
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ axorl %ebx, 2032(%ecx)
+
+// CHECK: axorl %ebx, -2048(%edx)
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ axorl %ebx, -2048(%edx)
Index: llvm/test/MC/X86/rao-int-intel.s
===================================================================
--- /dev/null
+++ llvm/test/MC/X86/rao-int-intel.s
@@ -0,0 +1,97 @@
+// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: aadd dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ aadd dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK: aadd dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ aadd dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK: aadd dword ptr [eax], ebx
+// CHECK: encoding: [0x0f,0x38,0xfc,0x18]
+ aadd dword ptr [eax], ebx
+
+// CHECK: aadd dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ aadd dword ptr [2*ebp - 512], ebx
+
+// CHECK: aadd dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ aadd dword ptr [ecx + 2032], ebx
+
+// CHECK: aadd dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ aadd dword ptr [edx - 2048], ebx
+
+// CHECK: aand dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ aand dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK: aand dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ aand dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK: aand dword ptr [eax], ebx
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x18]
+ aand dword ptr [eax], ebx
+
+// CHECK: aand dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ aand dword ptr [2*ebp - 512], ebx
+
+// CHECK: aand dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ aand dword ptr [ecx + 2032], ebx
+
+// CHECK: aand dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ aand dword ptr [edx - 2048], ebx
+
+// CHECK: aor dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ aor dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK: aor dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ aor dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK: aor dword ptr [eax], ebx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x18]
+ aor dword ptr [eax], ebx
+
+// CHECK: aor dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ aor dword ptr [2*ebp - 512], ebx
+
+// CHECK: aor dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ aor dword ptr [ecx + 2032], ebx
+
+// CHECK: aor dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ aor dword ptr [edx - 2048], ebx
+
+// CHECK: axor dword ptr [esp + 8*esi + 268435456], ebx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ axor dword ptr [esp + 8*esi + 268435456], ebx
+
+// CHECK: axor dword ptr [edi + 4*eax + 291], ebx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ axor dword ptr [edi + 4*eax + 291], ebx
+
+// CHECK: axor dword ptr [eax], ebx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x18]
+ axor dword ptr [eax], ebx
+
+// CHECK: axor dword ptr [2*ebp - 512], ebx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ axor dword ptr [2*ebp - 512], ebx
+
+// CHECK: axor dword ptr [ecx + 2032], ebx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ axor dword ptr [ecx + 2032], ebx
+
+// CHECK: axor dword ptr [edx - 2048], ebx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ axor dword ptr [edx - 2048], ebx
Index: llvm/test/MC/X86/rao-int-att.s
===================================================================
--- /dev/null
+++ llvm/test/MC/X86/rao-int-att.s
@@ -0,0 +1,97 @@
+// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: aaddl %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ aaddl %ebx, 268435456(%esp,%esi,8)
+
+// CHECK: aaddl %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ aaddl %ebx, 291(%edi,%eax,4)
+
+// CHECK: aaddl %ebx, (%eax)
+// CHECK: encoding: [0x0f,0x38,0xfc,0x18]
+ aaddl %ebx, (%eax)
+
+// CHECK: aaddl %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ aaddl %ebx, -512(,%ebp,2)
+
+// CHECK: aaddl %ebx, 2032(%ecx)
+// CHECK: encoding: [0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ aaddl %ebx, 2032(%ecx)
+
+// CHECK: aaddl %ebx, -2048(%edx)
+// CHECK: encoding: [0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ aaddl %ebx, -2048(%edx)
+
+// CHECK: aandl %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ aandl %ebx, 268435456(%esp,%esi,8)
+
+// CHECK: aandl %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ aandl %ebx, 291(%edi,%eax,4)
+
+// CHECK: aandl %ebx, (%eax)
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x18]
+ aandl %ebx, (%eax)
+
+// CHECK: aandl %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ aandl %ebx, -512(,%ebp,2)
+
+// CHECK: aandl %ebx, 2032(%ecx)
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ aandl %ebx, 2032(%ecx)
+
+// CHECK: aandl %ebx, -2048(%edx)
+// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ aandl %ebx, -2048(%edx)
+
+// CHECK: aorl %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ aorl %ebx, 268435456(%esp,%esi,8)
+
+// CHECK: aorl %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ aorl %ebx, 291(%edi,%eax,4)
+
+// CHECK: aorl %ebx, (%eax)
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x18]
+ aorl %ebx, (%eax)
+
+// CHECK: aorl %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ aorl %ebx, -512(,%ebp,2)
+
+// CHECK: aorl %ebx, 2032(%ecx)
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ aorl %ebx, 2032(%ecx)
+
+// CHECK: aorl %ebx, -2048(%edx)
+// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ aorl %ebx, -2048(%edx)
+
+// CHECK: axorl %ebx, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ axorl %ebx, 268435456(%esp,%esi,8)
+
+// CHECK: axorl %ebx, 291(%edi,%eax,4)
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00]
+ axorl %ebx, 291(%edi,%eax,4)
+
+// CHECK: axorl %ebx, (%eax)
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x18]
+ axorl %ebx, (%eax)
+
+// CHECK: axorl %ebx, -512(,%ebp,2)
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff]
+ axorl %ebx, -512(,%ebp,2)
+
+// CHECK: axorl %ebx, 2032(%ecx)
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00]
+ axorl %ebx, 2032(%ecx)
+
+// CHECK: axorl %ebx, -2048(%edx)
+// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff]
+ axorl %ebx, -2048(%edx)
Index: llvm/test/MC/Disassembler/X86/x86-64-rao-int.txt
===================================================================
--- /dev/null
+++ llvm/test/MC/Disassembler/X86/x86-64-rao-int.txt
@@ -0,0 +1,194 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: aaddq %r9, 268435456(%rbp,%r14,8)
+# INTEL: aadd qword ptr [rbp + 8*r14 + 268435456], r9
+0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: aaddq %r9, 291(%r8,%rax,4)
+# INTEL: aadd qword ptr [r8 + 4*rax + 291], r9
+0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT: aaddq %r9, (%rip)
+# INTEL: aadd qword ptr [rip], r9
+0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00
+
+# ATT: aaddq %r9, -512(,%rbp,2)
+# INTEL: aadd qword ptr [2*rbp - 512], r9
+0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: aaddq %r9, 2032(%rcx)
+# INTEL: aadd qword ptr [rcx + 2032], r9
+0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00
+
+# ATT: aaddq %r9, -2048(%rdx)
+# INTEL: aadd qword ptr [rdx - 2048], r9
+0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff
+
+# ATT: aaddl %ebx, 268435456(%esp,%esi,8)
+# INTEL: aadd dword ptr [esp + 8*esi + 268435456], ebx
+0x67,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: aaddl %ebx, 291(%edi,%eax,4)
+# INTEL: aadd dword ptr [edi + 4*eax + 291], ebx
+0x67,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT: aaddl %ebx, (%eax)
+# INTEL: aadd dword ptr [eax], ebx
+0x67,0x0f,0x38,0xfc,0x18
+
+# ATT: aaddl %ebx, -512(,%ebp,2)
+# INTEL: aadd dword ptr [2*ebp - 512], ebx
+0x67,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: aaddl %ebx, 2032(%ecx)
+# INTEL: aadd dword ptr [ecx + 2032], ebx
+0x67,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT: aaddl %ebx, -2048(%edx)
+# INTEL: aadd dword ptr [edx - 2048], ebx
+0x67,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
+
+# ATT: aandq %r9, 268435456(%rbp,%r14,8)
+# INTEL: aand qword ptr [rbp + 8*r14 + 268435456], r9
+0x66,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: aandq %r9, 291(%r8,%rax,4)
+# INTEL: aand qword ptr [r8 + 4*rax + 291], r9
+0x66,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT: aandq %r9, (%rip)
+# INTEL: aand qword ptr [rip], r9
+0x66,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00
+
+# ATT: aandq %r9, -512(,%rbp,2)
+# INTEL: aand qword ptr [2*rbp - 512], r9
+0x66,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: aandq %r9, 2032(%rcx)
+# INTEL: aand qword ptr [rcx + 2032], r9
+0x66,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00
+
+# ATT: aandq %r9, -2048(%rdx)
+# INTEL: aand qword ptr [rdx - 2048], r9
+0x66,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff
+
+# ATT: aandl %ebx, 268435456(%esp,%esi,8)
+# INTEL: aand dword ptr [esp + 8*esi + 268435456], ebx
+0x67,0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: aandl %ebx, 291(%edi,%eax,4)
+# INTEL: aand dword ptr [edi + 4*eax + 291], ebx
+0x67,0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT: aandl %ebx, (%eax)
+# INTEL: aand dword ptr [eax], ebx
+0x67,0x66,0x0f,0x38,0xfc,0x18
+
+# ATT: aandl %ebx, -512(,%ebp,2)
+# INTEL: aand dword ptr [2*ebp - 512], ebx
+0x67,0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: aandl %ebx, 2032(%ecx)
+# INTEL: aand dword ptr [ecx + 2032], ebx
+0x67,0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT: aandl %ebx, -2048(%edx)
+# INTEL: aand dword ptr [edx - 2048], ebx
+0x67,0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
+
+# ATT: aorq %r9, 268435456(%rbp,%r14,8)
+# INTEL: aor qword ptr [rbp + 8*r14 + 268435456], r9
+0xf2,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: aorq %r9, 291(%r8,%rax,4)
+# INTEL: aor qword ptr [r8 + 4*rax + 291], r9
+0xf2,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT: aorq %r9, (%rip)
+# INTEL: aor qword ptr [rip], r9
+0xf2,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00
+
+# ATT: aorq %r9, -512(,%rbp,2)
+# INTEL: aor qword ptr [2*rbp - 512], r9
+0xf2,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: aorq %r9, 2032(%rcx)
+# INTEL: aor qword ptr [rcx + 2032], r9
+0xf2,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00
+
+# ATT: aorq %r9, -2048(%rdx)
+# INTEL: aor qword ptr [rdx - 2048], r9
+0xf2,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff
+
+# ATT: aorl %ebx, 268435456(%esp,%esi,8)
+# INTEL: aor dword ptr [esp + 8*esi + 268435456], ebx
+0x67,0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: aorl %ebx, 291(%edi,%eax,4)
+# INTEL: aor dword ptr [edi + 4*eax + 291], ebx
+0x67,0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT: aorl %ebx, (%eax)
+# INTEL: aor dword ptr [eax], ebx
+0x67,0xf2,0x0f,0x38,0xfc,0x18
+
+# ATT: aorl %ebx, -512(,%ebp,2)
+# INTEL: aor dword ptr [2*ebp - 512], ebx
+0x67,0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: aorl %ebx, 2032(%ecx)
+# INTEL: aor dword ptr [ecx + 2032], ebx
+0x67,0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT: aorl %ebx, -2048(%edx)
+# INTEL: aor dword ptr [edx - 2048], ebx
+0x67,0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
+
+# ATT: axorq %r9, 268435456(%rbp,%r14,8)
+# INTEL: axor qword ptr [rbp + 8*r14 + 268435456], r9
+0xf3,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: axorq %r9, 291(%r8,%rax,4)
+# INTEL: axor qword ptr [r8 + 4*rax + 291], r9
+0xf3,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT: axorq %r9, (%rip)
+# INTEL: axor qword ptr [rip], r9
+0xf3,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00
+
+# ATT: axorq %r9, -512(,%rbp,2)
+# INTEL: axor qword ptr [2*rbp - 512], r9
+0xf3,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: axorq %r9, 2032(%rcx)
+# INTEL: axor qword ptr [rcx + 2032], r9
+0xf3,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00
+
+# ATT: axorq %r9, -2048(%rdx)
+# INTEL: axor qword ptr [rdx - 2048], r9
+0xf3,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff
+
+# ATT: axorl %ebx, 268435456(%esp,%esi,8)
+# INTEL: axor dword ptr [esp + 8*esi + 268435456], ebx
+0x67,0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: axorl %ebx, 291(%edi,%eax,4)
+# INTEL: axor dword ptr [edi + 4*eax + 291], ebx
+0x67,0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT: axorl %ebx, (%eax)
+# INTEL: axor dword ptr [eax], ebx
+0x67,0xf3,0x0f,0x38,0xfc,0x18
+
+# ATT: axorl %ebx, -512(,%ebp,2)
+# INTEL: axor dword ptr [2*ebp - 512], ebx
+0x67,0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: axorl %ebx, 2032(%ecx)
+# INTEL: axor dword ptr [ecx + 2032], ebx
+0x67,0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT: axorl %ebx, -2048(%edx)
+# INTEL: axor dword ptr [edx - 2048], ebx
+0x67,0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
Index: llvm/test/MC/Disassembler/X86/rao-int.txt
===================================================================
--- /dev/null
+++ llvm/test/MC/Disassembler/X86/rao-int.txt
@@ -0,0 +1,98 @@
+# RUN: llvm-mc --disassemble %s -triple=i686 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i686 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: aaddl %ebx, 268435456(%esp,%esi,8)
+# INTEL: aadd dword ptr [esp + 8*esi + 268435456], ebx
+0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: aaddl %ebx, 291(%edi,%eax,4)
+# INTEL: aadd dword ptr [edi + 4*eax + 291], ebx
+0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT: aaddl %ebx, (%eax)
+# INTEL: aadd dword ptr [eax], ebx
+0x0f,0x38,0xfc,0x18
+
+# ATT: aaddl %ebx, -512(,%ebp,2)
+# INTEL: aadd dword ptr [2*ebp - 512], ebx
+0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: aaddl %ebx, 2032(%ecx)
+# INTEL: aadd dword ptr [ecx + 2032], ebx
+0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT: aaddl %ebx, -2048(%edx)
+# INTEL: aadd dword ptr [edx - 2048], ebx
+0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
+
+# ATT: aandl %ebx, 268435456(%esp,%esi,8)
+# INTEL: aand dword ptr [esp + 8*esi + 268435456], ebx
+0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: aandl %ebx, 291(%edi,%eax,4)
+# INTEL: aand dword ptr [edi + 4*eax + 291], ebx
+0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT: aandl %ebx, (%eax)
+# INTEL: aand dword ptr [eax], ebx
+0x66,0x0f,0x38,0xfc,0x18
+
+# ATT: aandl %ebx, -512(,%ebp,2)
+# INTEL: aand dword ptr [2*ebp - 512], ebx
+0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: aandl %ebx, 2032(%ecx)
+# INTEL: aand dword ptr [ecx + 2032], ebx
+0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT: aandl %ebx, -2048(%edx)
+# INTEL: aand dword ptr [edx - 2048], ebx
+0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
+
+# ATT: aorl %ebx, 268435456(%esp,%esi,8)
+# INTEL: aor dword ptr [esp + 8*esi + 268435456], ebx
+0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: aorl %ebx, 291(%edi,%eax,4)
+# INTEL: aor dword ptr [edi + 4*eax + 291], ebx
+0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT: aorl %ebx, (%eax)
+# INTEL: aor dword ptr [eax], ebx
+0xf2,0x0f,0x38,0xfc,0x18
+
+# ATT: aorl %ebx, -512(,%ebp,2)
+# INTEL: aor dword ptr [2*ebp - 512], ebx
+0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: aorl %ebx, 2032(%ecx)
+# INTEL: aor dword ptr [ecx + 2032], ebx
+0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT: aorl %ebx, -2048(%edx)
+# INTEL: aor dword ptr [edx - 2048], ebx
+0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
+
+# ATT: axorl %ebx, 268435456(%esp,%esi,8)
+# INTEL: axor dword ptr [esp + 8*esi + 268435456], ebx
+0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: axorl %ebx, 291(%edi,%eax,4)
+# INTEL: axor dword ptr [edi + 4*eax + 291], ebx
+0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00
+
+# ATT: axorl %ebx, (%eax)
+# INTEL: axor dword ptr [eax], ebx
+0xf3,0x0f,0x38,0xfc,0x18
+
+# ATT: axorl %ebx, -512(,%ebp,2)
+# INTEL: axor dword ptr [2*ebp - 512], ebx
+0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: axorl %ebx, 2032(%ecx)
+# INTEL: axor dword ptr [ecx + 2032], ebx
+0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00
+
+# ATT: axorl %ebx, -2048(%edx)
+# INTEL: axor dword ptr [edx - 2048], ebx
+0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff
Index: llvm/test/CodeGen/X86/atomic-instructions-64.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/atomic-instructions-64.ll
@@ -0,0 +1,293 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+raoint | FileCheck %s --check-prefixes=RAO-INT
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=NO-RAOINT
+
+define i64 @atomic_add64(i64* nocapture %p, i64 %val) nounwind ssp {
+; RAO-INT-LABEL: atomic_add64:
+; RAO-INT: # %bb.0:
+; RAO-INT-NEXT: movq %rsi, %rax
+; RAO-INT-NEXT: aaddq %rax, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aaddq %rax, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aaddq %rax, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aaddq %rax, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aaddq %rax, (%rdi)
+; RAO-INT-NEXT: lock xaddq %rax, (%rdi)
+; RAO-INT-NEXT: retq
+;
+; NO-RAOINT-LABEL: atomic_add64:
+; NO-RAOINT: # %bb.0:
+; NO-RAOINT-NEXT: movq %rsi, %rax
+; NO-RAOINT-NEXT: lock addq %rax, (%rdi)
+; NO-RAOINT-NEXT: lock addq %rax, (%rdi)
+; NO-RAOINT-NEXT: lock addq %rax, (%rdi)
+; NO-RAOINT-NEXT: lock addq %rax, (%rdi)
+; NO-RAOINT-NEXT: lock addq %rax, (%rdi)
+; NO-RAOINT-NEXT: lock xaddq %rax, (%rdi)
+; NO-RAOINT-NEXT: retq
+ %1 = atomicrmw add i64* %p, i64 %val monotonic
+ %2 = atomicrmw add i64* %p, i64 %val acquire
+ %3 = atomicrmw add i64* %p, i64 %val release
+ %4 = atomicrmw add i64* %p, i64 %val acq_rel
+ %5 = atomicrmw add i64* %p, i64 %val seq_cst
+
+ %6 = atomicrmw add i64* %p, i64 %val seq_cst
+ ret i64 %6
+}
+
+define i64 @atomic_or64(i64* nocapture %p, i64 %val) nounwind ssp {
+; RAO-INT-LABEL: atomic_or64:
+; RAO-INT: # %bb.0:
+; RAO-INT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT: aorq %rsi, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aorq %rsi, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aorq %rsi, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aorq %rsi, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aorq %rsi, (%rdi)
+; RAO-INT-NEXT: movq (%rdi), %rax
+; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT: .LBB1_1: # %atomicrmw.start
+; RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1
+; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; RAO-INT-NEXT: movq %rax, %rdx
+; RAO-INT-NEXT: orq %rsi, %rdx
+; RAO-INT-NEXT: lock cmpxchgq %rdx, (%rcx)
+; RAO-INT-NEXT: sete %cl
+; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT: testb $1, %cl
+; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT: jne .LBB1_2
+; RAO-INT-NEXT: jmp .LBB1_1
+; RAO-INT-NEXT: .LBB1_2: # %atomicrmw.end
+; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; RAO-INT-NEXT: retq
+;
+; NO-RAOINT-LABEL: atomic_or64:
+; NO-RAOINT: # %bb.0:
+; NO-RAOINT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT: lock orq %rsi, (%rdi)
+; NO-RAOINT-NEXT: lock orq %rsi, (%rdi)
+; NO-RAOINT-NEXT: lock orq %rsi, (%rdi)
+; NO-RAOINT-NEXT: lock orq %rsi, (%rdi)
+; NO-RAOINT-NEXT: lock orq %rsi, (%rdi)
+; NO-RAOINT-NEXT: movq (%rdi), %rax
+; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT: .LBB1_1: # %atomicrmw.start
+; NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1
+; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; NO-RAOINT-NEXT: movq %rax, %rdx
+; NO-RAOINT-NEXT: orq %rsi, %rdx
+; NO-RAOINT-NEXT: lock cmpxchgq %rdx, (%rcx)
+; NO-RAOINT-NEXT: sete %cl
+; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT: testb $1, %cl
+; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT: jne .LBB1_2
+; NO-RAOINT-NEXT: jmp .LBB1_1
+; NO-RAOINT-NEXT: .LBB1_2: # %atomicrmw.end
+; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; NO-RAOINT-NEXT: retq
+ %1 = atomicrmw or i64* %p, i64 %val monotonic
+ %2 = atomicrmw or i64* %p, i64 %val acquire
+ %3 = atomicrmw or i64* %p, i64 %val release
+ %4 = atomicrmw or i64* %p, i64 %val acq_rel
+ %5 = atomicrmw or i64* %p, i64 %val seq_cst
+
+ %6 = atomicrmw or i64* %p, i64 %val seq_cst
+ ret i64 %6
+}
+
+define i64 @atomic_xor64(i64* nocapture %p, i64 %val) nounwind ssp {
+; RAO-INT-LABEL: atomic_xor64:
+; RAO-INT: # %bb.0:
+; RAO-INT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT: axorq %rsi, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: axorq %rsi, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: axorq %rsi, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: axorq %rsi, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: axorq %rsi, (%rdi)
+; RAO-INT-NEXT: movq (%rdi), %rax
+; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT: .LBB2_1: # %atomicrmw.start
+; RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1
+; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; RAO-INT-NEXT: movq %rax, %rdx
+; RAO-INT-NEXT: xorq %rsi, %rdx
+; RAO-INT-NEXT: lock cmpxchgq %rdx, (%rcx)
+; RAO-INT-NEXT: sete %cl
+; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT: testb $1, %cl
+; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT: jne .LBB2_2
+; RAO-INT-NEXT: jmp .LBB2_1
+; RAO-INT-NEXT: .LBB2_2: # %atomicrmw.end
+; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; RAO-INT-NEXT: retq
+;
+; NO-RAOINT-LABEL: atomic_xor64:
+; NO-RAOINT: # %bb.0:
+; NO-RAOINT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT: lock xorq %rsi, (%rdi)
+; NO-RAOINT-NEXT: lock xorq %rsi, (%rdi)
+; NO-RAOINT-NEXT: lock xorq %rsi, (%rdi)
+; NO-RAOINT-NEXT: lock xorq %rsi, (%rdi)
+; NO-RAOINT-NEXT: lock xorq %rsi, (%rdi)
+; NO-RAOINT-NEXT: movq (%rdi), %rax
+; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT: .LBB2_1: # %atomicrmw.start
+; NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1
+; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; NO-RAOINT-NEXT: movq %rax, %rdx
+; NO-RAOINT-NEXT: xorq %rsi, %rdx
+; NO-RAOINT-NEXT: lock cmpxchgq %rdx, (%rcx)
+; NO-RAOINT-NEXT: sete %cl
+; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT: testb $1, %cl
+; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT: jne .LBB2_2
+; NO-RAOINT-NEXT: jmp .LBB2_1
+; NO-RAOINT-NEXT: .LBB2_2: # %atomicrmw.end
+; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; NO-RAOINT-NEXT: retq
+ %1 = atomicrmw xor i64* %p, i64 %val monotonic
+ %2 = atomicrmw xor i64* %p, i64 %val acquire
+ %3 = atomicrmw xor i64* %p, i64 %val release
+ %4 = atomicrmw xor i64* %p, i64 %val acq_rel
+ %5 = atomicrmw xor i64* %p, i64 %val seq_cst
+
+ %6 = atomicrmw xor i64* %p, i64 %val seq_cst
+ ret i64 %6
+}
+
+define i64 @atomic_and64(i64* nocapture %p, i64 %val) nounwind ssp {
+; RAO-INT-LABEL: atomic_and64:
+; RAO-INT: # %bb.0:
+; RAO-INT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT: aandq %rsi, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aandq %rsi, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aandq %rsi, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aandq %rsi, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aandq %rsi, (%rdi)
+; RAO-INT-NEXT: movq (%rdi), %rax
+; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT: .LBB3_1: # %atomicrmw.start
+; RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1
+; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; RAO-INT-NEXT: movq %rax, %rdx
+; RAO-INT-NEXT: andq %rsi, %rdx
+; RAO-INT-NEXT: lock cmpxchgq %rdx, (%rcx)
+; RAO-INT-NEXT: sete %cl
+; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT: testb $1, %cl
+; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; RAO-INT-NEXT: jne .LBB3_2
+; RAO-INT-NEXT: jmp .LBB3_1
+; RAO-INT-NEXT: .LBB3_2: # %atomicrmw.end
+; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; RAO-INT-NEXT: retq
+;
+; NO-RAOINT-LABEL: atomic_and64:
+; NO-RAOINT: # %bb.0:
+; NO-RAOINT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT: lock andq %rsi, (%rdi)
+; NO-RAOINT-NEXT: lock andq %rsi, (%rdi)
+; NO-RAOINT-NEXT: lock andq %rsi, (%rdi)
+; NO-RAOINT-NEXT: lock andq %rsi, (%rdi)
+; NO-RAOINT-NEXT: lock andq %rsi, (%rdi)
+; NO-RAOINT-NEXT: movq (%rdi), %rax
+; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT: .LBB3_1: # %atomicrmw.start
+; NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1
+; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; NO-RAOINT-NEXT: movq %rax, %rdx
+; NO-RAOINT-NEXT: andq %rsi, %rdx
+; NO-RAOINT-NEXT: lock cmpxchgq %rdx, (%rcx)
+; NO-RAOINT-NEXT: sete %cl
+; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT: testb $1, %cl
+; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NO-RAOINT-NEXT: jne .LBB3_2
+; NO-RAOINT-NEXT: jmp .LBB3_1
+; NO-RAOINT-NEXT: .LBB3_2: # %atomicrmw.end
+; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; NO-RAOINT-NEXT: retq
+ %1 = atomicrmw and i64* %p, i64 %val monotonic
+ %2 = atomicrmw and i64* %p, i64 %val acquire
+ %3 = atomicrmw and i64* %p, i64 %val release
+ %4 = atomicrmw and i64* %p, i64 %val acq_rel
+ %5 = atomicrmw and i64* %p, i64 %val seq_cst
+
+ %6 = atomicrmw and i64* %p, i64 %val seq_cst
+ ret i64 %6
+}
+
+define i64 @atomic_sub64(i64* nocapture %p, i64 %val) nounwind ssp {
+; RAO-INT-LABEL: atomic_sub64:
+; RAO-INT: # %bb.0:
+; RAO-INT-NEXT: movq %rsi, %rax
+; RAO-INT-NEXT: negq %rax
+; RAO-INT-NEXT: aaddq %rax, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aaddq %rax, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aaddq %rax, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aaddq %rax, (%rdi)
+; RAO-INT-NEXT: mfence
+; RAO-INT-NEXT: aaddq %rax, (%rdi)
+; RAO-INT-NEXT: lock xaddq %rax, (%rdi)
+; RAO-INT-NEXT: retq
+;
+; NO-RAOINT-LABEL: atomic_sub64:
+; NO-RAOINT: # %bb.0:
+; NO-RAOINT-NEXT: movq %rsi, %rax
+; NO-RAOINT-NEXT: lock subq %rax, (%rdi)
+; NO-RAOINT-NEXT: lock subq %rax, (%rdi)
+; NO-RAOINT-NEXT: lock subq %rax, (%rdi)
+; NO-RAOINT-NEXT: lock subq %rax, (%rdi)
+; NO-RAOINT-NEXT: lock subq %rax, (%rdi)
+; NO-RAOINT-NEXT: negq %rax
+; NO-RAOINT-NEXT: lock xaddq %rax, (%rdi)
+; NO-RAOINT-NEXT: retq
+ %1 = atomicrmw sub i64* %p, i64 %val monotonic
+ %2 = atomicrmw sub i64* %p, i64 %val acquire
+ %3 = atomicrmw sub i64* %p, i64 %val release
+ %4 = atomicrmw sub i64* %p, i64 %val acq_rel
+ %5 = atomicrmw sub i64* %p, i64 %val seq_cst
+
+ %6 = atomicrmw sub i64* %p, i64 %val seq_cst
+ ret i64 %6
+}
Index: llvm/test/CodeGen/X86/atomic-instructions-32.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/atomic-instructions-32.ll
@@ -0,0 +1,575 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86-NO-RAOINT
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=i686-unknown-unknown -mattr=+raoint | FileCheck %s --check-prefixes=X86-RAO-INT
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64-NO-RAOINT
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+raoint | FileCheck %s --check-prefixes=X64-RAO-INT
+
+define i32 @atomic_add32(i32* nocapture %p, i32 %val) nounwind ssp {
+; X86-NO-RAOINT-LABEL: atomic_add32:
+; X86-NO-RAOINT: # %bb.0:
+; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NO-RAOINT-NEXT: lock addl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT: lock addl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT: lock addl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT: lock addl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT: lock addl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT: lock xaddl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT: retl
+;
+; X86-RAO-INT-LABEL: atomic_add32:
+; X86-RAO-INT: # %bb.0:
+; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-RAO-INT-NEXT: aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT: lock xaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT: retl
+;
+; X64-NO-RAOINT-LABEL: atomic_add32:
+; X64-NO-RAOINT: # %bb.0:
+; X64-NO-RAOINT-NEXT: movl %esi, %eax
+; X64-NO-RAOINT-NEXT: lock addl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT: lock addl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT: lock addl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT: lock addl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT: lock addl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT: lock xaddl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT: retq
+;
+; X64-RAO-INT-LABEL: atomic_add32:
+; X64-RAO-INT: # %bb.0:
+; X64-RAO-INT-NEXT: movl %esi, %eax
+; X64-RAO-INT-NEXT: aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT: lock xaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT: retq
+ %1 = atomicrmw add i32* %p, i32 %val monotonic
+ %2 = atomicrmw add i32* %p, i32 %val acquire
+ %3 = atomicrmw add i32* %p, i32 %val release
+ %4 = atomicrmw add i32* %p, i32 %val acq_rel
+ %5 = atomicrmw add i32* %p, i32 %val seq_cst
+
+ %6 = atomicrmw add i32* %p, i32 %val seq_cst
+ ret i32 %6
+}
+
+define i32 @atomic_or32(i32* nocapture %p, i32 %val) nounwind ssp {
+; X86-NO-RAOINT-LABEL: atomic_or32:
+; X86-NO-RAOINT: # %bb.0:
+; X86-NO-RAOINT-NEXT: pushl %esi
+; X86-NO-RAOINT-NEXT: subl $16, %esp
+; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NO-RAOINT-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT: lock orl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT: lock orl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT: lock orl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT: lock orl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT: lock orl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT: movl (%eax), %eax
+; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT: .LBB1_1: # %atomicrmw.start
+; X86-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NO-RAOINT-NEXT: movl %eax, %edx
+; X86-NO-RAOINT-NEXT: orl %esi, %edx
+; X86-NO-RAOINT-NEXT: lock cmpxchgl %edx, (%ecx)
+; X86-NO-RAOINT-NEXT: sete %cl
+; X86-NO-RAOINT-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NO-RAOINT-NEXT: testb $1, %cl
+; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT: jne .LBB1_2
+; X86-NO-RAOINT-NEXT: jmp .LBB1_1
+; X86-NO-RAOINT-NEXT: .LBB1_2: # %atomicrmw.end
+; X86-NO-RAOINT-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NO-RAOINT-NEXT: addl $16, %esp
+; X86-NO-RAOINT-NEXT: popl %esi
+; X86-NO-RAOINT-NEXT: retl
+;
+; X86-RAO-INT-LABEL: atomic_or32:
+; X86-RAO-INT: # %bb.0:
+; X86-RAO-INT-NEXT: pushl %esi
+; X86-RAO-INT-NEXT: subl $16, %esp
+; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-RAO-INT-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT: aorl %ecx, (%eax)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aorl %ecx, (%eax)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aorl %ecx, (%eax)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aorl %ecx, (%eax)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aorl %ecx, (%eax)
+; X86-RAO-INT-NEXT: movl (%eax), %eax
+; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT: .LBB1_1: # %atomicrmw.start
+; X86-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-RAO-INT-NEXT: movl %eax, %edx
+; X86-RAO-INT-NEXT: orl %esi, %edx
+; X86-RAO-INT-NEXT: lock cmpxchgl %edx, (%ecx)
+; X86-RAO-INT-NEXT: sete %cl
+; X86-RAO-INT-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-RAO-INT-NEXT: testb $1, %cl
+; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT: jne .LBB1_2
+; X86-RAO-INT-NEXT: jmp .LBB1_1
+; X86-RAO-INT-NEXT: .LBB1_2: # %atomicrmw.end
+; X86-RAO-INT-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-RAO-INT-NEXT: addl $16, %esp
+; X86-RAO-INT-NEXT: popl %esi
+; X86-RAO-INT-NEXT: retl
+;
+; X64-NO-RAOINT-LABEL: atomic_or32:
+; X64-NO-RAOINT: # %bb.0:
+; X64-NO-RAOINT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NO-RAOINT-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT: lock orl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT: lock orl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT: lock orl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT: lock orl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT: lock orl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT: movl (%rdi), %eax
+; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT: .LBB1_1: # %atomicrmw.start
+; X64-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
+; X64-NO-RAOINT-NEXT: movl %eax, %edx
+; X64-NO-RAOINT-NEXT: orl %esi, %edx
+; X64-NO-RAOINT-NEXT: lock cmpxchgl %edx, (%rcx)
+; X64-NO-RAOINT-NEXT: sete %cl
+; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT: testb $1, %cl
+; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT: jne .LBB1_2
+; X64-NO-RAOINT-NEXT: jmp .LBB1_1
+; X64-NO-RAOINT-NEXT: .LBB1_2: # %atomicrmw.end
+; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-NO-RAOINT-NEXT: retq
+;
+; X64-RAO-INT-LABEL: atomic_or32:
+; X64-RAO-INT: # %bb.0:
+; X64-RAO-INT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-RAO-INT-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT: aorl %esi, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aorl %esi, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aorl %esi, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aorl %esi, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aorl %esi, (%rdi)
+; X64-RAO-INT-NEXT: movl (%rdi), %eax
+; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT: .LBB1_1: # %atomicrmw.start
+; X64-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
+; X64-RAO-INT-NEXT: movl %eax, %edx
+; X64-RAO-INT-NEXT: orl %esi, %edx
+; X64-RAO-INT-NEXT: lock cmpxchgl %edx, (%rcx)
+; X64-RAO-INT-NEXT: sete %cl
+; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT: testb $1, %cl
+; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT: jne .LBB1_2
+; X64-RAO-INT-NEXT: jmp .LBB1_1
+; X64-RAO-INT-NEXT: .LBB1_2: # %atomicrmw.end
+; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-RAO-INT-NEXT: retq
+ %1 = atomicrmw or i32* %p, i32 %val monotonic
+ %2 = atomicrmw or i32* %p, i32 %val acquire
+ %3 = atomicrmw or i32* %p, i32 %val release
+ %4 = atomicrmw or i32* %p, i32 %val acq_rel
+ %5 = atomicrmw or i32* %p, i32 %val seq_cst
+
+ %6 = atomicrmw or i32* %p, i32 %val seq_cst
+ ret i32 %6
+}
+
+define i32 @atomic_xor32(i32* nocapture %p, i32 %val) nounwind ssp {
+; X86-NO-RAOINT-LABEL: atomic_xor32:
+; X86-NO-RAOINT: # %bb.0:
+; X86-NO-RAOINT-NEXT: pushl %esi
+; X86-NO-RAOINT-NEXT: subl $16, %esp
+; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NO-RAOINT-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT: lock xorl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT: lock xorl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT: lock xorl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT: lock xorl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT: lock xorl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT: movl (%eax), %eax
+; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT: .LBB2_1: # %atomicrmw.start
+; X86-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NO-RAOINT-NEXT: movl %eax, %edx
+; X86-NO-RAOINT-NEXT: xorl %esi, %edx
+; X86-NO-RAOINT-NEXT: lock cmpxchgl %edx, (%ecx)
+; X86-NO-RAOINT-NEXT: sete %cl
+; X86-NO-RAOINT-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NO-RAOINT-NEXT: testb $1, %cl
+; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT: jne .LBB2_2
+; X86-NO-RAOINT-NEXT: jmp .LBB2_1
+; X86-NO-RAOINT-NEXT: .LBB2_2: # %atomicrmw.end
+; X86-NO-RAOINT-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NO-RAOINT-NEXT: addl $16, %esp
+; X86-NO-RAOINT-NEXT: popl %esi
+; X86-NO-RAOINT-NEXT: retl
+;
+; X86-RAO-INT-LABEL: atomic_xor32:
+; X86-RAO-INT: # %bb.0:
+; X86-RAO-INT-NEXT: pushl %esi
+; X86-RAO-INT-NEXT: subl $16, %esp
+; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-RAO-INT-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT: axorl %ecx, (%eax)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: axorl %ecx, (%eax)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: axorl %ecx, (%eax)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: axorl %ecx, (%eax)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: axorl %ecx, (%eax)
+; X86-RAO-INT-NEXT: movl (%eax), %eax
+; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT: .LBB2_1: # %atomicrmw.start
+; X86-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-RAO-INT-NEXT: movl %eax, %edx
+; X86-RAO-INT-NEXT: xorl %esi, %edx
+; X86-RAO-INT-NEXT: lock cmpxchgl %edx, (%ecx)
+; X86-RAO-INT-NEXT: sete %cl
+; X86-RAO-INT-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-RAO-INT-NEXT: testb $1, %cl
+; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT: jne .LBB2_2
+; X86-RAO-INT-NEXT: jmp .LBB2_1
+; X86-RAO-INT-NEXT: .LBB2_2: # %atomicrmw.end
+; X86-RAO-INT-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-RAO-INT-NEXT: addl $16, %esp
+; X86-RAO-INT-NEXT: popl %esi
+; X86-RAO-INT-NEXT: retl
+;
+; X64-NO-RAOINT-LABEL: atomic_xor32:
+; X64-NO-RAOINT: # %bb.0:
+; X64-NO-RAOINT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NO-RAOINT-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT: lock xorl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT: lock xorl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT: lock xorl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT: lock xorl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT: lock xorl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT: movl (%rdi), %eax
+; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT: .LBB2_1: # %atomicrmw.start
+; X64-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
+; X64-NO-RAOINT-NEXT: movl %eax, %edx
+; X64-NO-RAOINT-NEXT: xorl %esi, %edx
+; X64-NO-RAOINT-NEXT: lock cmpxchgl %edx, (%rcx)
+; X64-NO-RAOINT-NEXT: sete %cl
+; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT: testb $1, %cl
+; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT: jne .LBB2_2
+; X64-NO-RAOINT-NEXT: jmp .LBB2_1
+; X64-NO-RAOINT-NEXT: .LBB2_2: # %atomicrmw.end
+; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-NO-RAOINT-NEXT: retq
+;
+; X64-RAO-INT-LABEL: atomic_xor32:
+; X64-RAO-INT: # %bb.0:
+; X64-RAO-INT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-RAO-INT-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT: axorl %esi, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: axorl %esi, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: axorl %esi, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: axorl %esi, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: axorl %esi, (%rdi)
+; X64-RAO-INT-NEXT: movl (%rdi), %eax
+; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT: .LBB2_1: # %atomicrmw.start
+; X64-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
+; X64-RAO-INT-NEXT: movl %eax, %edx
+; X64-RAO-INT-NEXT: xorl %esi, %edx
+; X64-RAO-INT-NEXT: lock cmpxchgl %edx, (%rcx)
+; X64-RAO-INT-NEXT: sete %cl
+; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT: testb $1, %cl
+; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT: jne .LBB2_2
+; X64-RAO-INT-NEXT: jmp .LBB2_1
+; X64-RAO-INT-NEXT: .LBB2_2: # %atomicrmw.end
+; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-RAO-INT-NEXT: retq
+ %1 = atomicrmw xor i32* %p, i32 %val monotonic
+ %2 = atomicrmw xor i32* %p, i32 %val acquire
+ %3 = atomicrmw xor i32* %p, i32 %val release
+ %4 = atomicrmw xor i32* %p, i32 %val acq_rel
+ %5 = atomicrmw xor i32* %p, i32 %val seq_cst
+
+ %6 = atomicrmw xor i32* %p, i32 %val seq_cst
+ ret i32 %6
+}
+
+define i32 @atomic_and32(i32* nocapture %p, i32 %val) nounwind ssp {
+; X86-NO-RAOINT-LABEL: atomic_and32:
+; X86-NO-RAOINT: # %bb.0:
+; X86-NO-RAOINT-NEXT: pushl %esi
+; X86-NO-RAOINT-NEXT: subl $16, %esp
+; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NO-RAOINT-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT: lock andl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT: lock andl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT: lock andl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT: lock andl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT: lock andl %ecx, (%eax)
+; X86-NO-RAOINT-NEXT: movl (%eax), %eax
+; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT: .LBB3_1: # %atomicrmw.start
+; X86-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NO-RAOINT-NEXT: movl %eax, %edx
+; X86-NO-RAOINT-NEXT: andl %esi, %edx
+; X86-NO-RAOINT-NEXT: lock cmpxchgl %edx, (%ecx)
+; X86-NO-RAOINT-NEXT: sete %cl
+; X86-NO-RAOINT-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NO-RAOINT-NEXT: testb $1, %cl
+; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NO-RAOINT-NEXT: jne .LBB3_2
+; X86-NO-RAOINT-NEXT: jmp .LBB3_1
+; X86-NO-RAOINT-NEXT: .LBB3_2: # %atomicrmw.end
+; X86-NO-RAOINT-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NO-RAOINT-NEXT: addl $16, %esp
+; X86-NO-RAOINT-NEXT: popl %esi
+; X86-NO-RAOINT-NEXT: retl
+;
+; X86-RAO-INT-LABEL: atomic_and32:
+; X86-RAO-INT: # %bb.0:
+; X86-RAO-INT-NEXT: pushl %esi
+; X86-RAO-INT-NEXT: subl $16, %esp
+; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-RAO-INT-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT: aandl %ecx, (%eax)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aandl %ecx, (%eax)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aandl %ecx, (%eax)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aandl %ecx, (%eax)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aandl %ecx, (%eax)
+; X86-RAO-INT-NEXT: movl (%eax), %eax
+; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT: .LBB3_1: # %atomicrmw.start
+; X86-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-RAO-INT-NEXT: movl %eax, %edx
+; X86-RAO-INT-NEXT: andl %esi, %edx
+; X86-RAO-INT-NEXT: lock cmpxchgl %edx, (%ecx)
+; X86-RAO-INT-NEXT: sete %cl
+; X86-RAO-INT-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-RAO-INT-NEXT: testb $1, %cl
+; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-RAO-INT-NEXT: jne .LBB3_2
+; X86-RAO-INT-NEXT: jmp .LBB3_1
+; X86-RAO-INT-NEXT: .LBB3_2: # %atomicrmw.end
+; X86-RAO-INT-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-RAO-INT-NEXT: addl $16, %esp
+; X86-RAO-INT-NEXT: popl %esi
+; X86-RAO-INT-NEXT: retl
+;
+; X64-NO-RAOINT-LABEL: atomic_and32:
+; X64-NO-RAOINT: # %bb.0:
+; X64-NO-RAOINT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NO-RAOINT-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT: lock andl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT: lock andl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT: lock andl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT: lock andl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT: lock andl %esi, (%rdi)
+; X64-NO-RAOINT-NEXT: movl (%rdi), %eax
+; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT: .LBB3_1: # %atomicrmw.start
+; X64-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
+; X64-NO-RAOINT-NEXT: movl %eax, %edx
+; X64-NO-RAOINT-NEXT: andl %esi, %edx
+; X64-NO-RAOINT-NEXT: lock cmpxchgl %edx, (%rcx)
+; X64-NO-RAOINT-NEXT: sete %cl
+; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT: testb $1, %cl
+; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NO-RAOINT-NEXT: jne .LBB3_2
+; X64-NO-RAOINT-NEXT: jmp .LBB3_1
+; X64-NO-RAOINT-NEXT: .LBB3_2: # %atomicrmw.end
+; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-NO-RAOINT-NEXT: retq
+;
+; X64-RAO-INT-LABEL: atomic_and32:
+; X64-RAO-INT: # %bb.0:
+; X64-RAO-INT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-RAO-INT-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT: aandl %esi, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aandl %esi, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aandl %esi, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aandl %esi, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aandl %esi, (%rdi)
+; X64-RAO-INT-NEXT: movl (%rdi), %eax
+; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT: .LBB3_1: # %atomicrmw.start
+; X64-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
+; X64-RAO-INT-NEXT: movl %eax, %edx
+; X64-RAO-INT-NEXT: andl %esi, %edx
+; X64-RAO-INT-NEXT: lock cmpxchgl %edx, (%rcx)
+; X64-RAO-INT-NEXT: sete %cl
+; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT: testb $1, %cl
+; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-RAO-INT-NEXT: jne .LBB3_2
+; X64-RAO-INT-NEXT: jmp .LBB3_1
+; X64-RAO-INT-NEXT: .LBB3_2: # %atomicrmw.end
+; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; X64-RAO-INT-NEXT: retq
+ %1 = atomicrmw and i32* %p, i32 %val monotonic
+ %2 = atomicrmw and i32* %p, i32 %val acquire
+ %3 = atomicrmw and i32* %p, i32 %val release
+ %4 = atomicrmw and i32* %p, i32 %val acq_rel
+ %5 = atomicrmw and i32* %p, i32 %val seq_cst
+
+ %6 = atomicrmw and i32* %p, i32 %val seq_cst
+ ret i32 %6
+}
+
+define i32 @atomic_sub32(i32* nocapture %p, i32 %val) nounwind ssp {
+; X86-NO-RAOINT-LABEL: atomic_sub32:
+; X86-NO-RAOINT: # %bb.0:
+; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NO-RAOINT-NEXT: lock subl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT: lock subl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT: lock subl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT: lock subl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT: lock subl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT: negl %eax
+; X86-NO-RAOINT-NEXT: lock xaddl %eax, (%ecx)
+; X86-NO-RAOINT-NEXT: retl
+;
+; X86-RAO-INT-LABEL: atomic_sub32:
+; X86-RAO-INT: # %bb.0:
+; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-RAO-INT-NEXT: negl %eax
+; X86-RAO-INT-NEXT: aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT: mfence
+; X86-RAO-INT-NEXT: aaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT: lock xaddl %eax, (%ecx)
+; X86-RAO-INT-NEXT: retl
+;
+; X64-NO-RAOINT-LABEL: atomic_sub32:
+; X64-NO-RAOINT: # %bb.0:
+; X64-NO-RAOINT-NEXT: movl %esi, %eax
+; X64-NO-RAOINT-NEXT: lock subl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT: lock subl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT: lock subl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT: lock subl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT: lock subl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT: negl %eax
+; X64-NO-RAOINT-NEXT: lock xaddl %eax, (%rdi)
+; X64-NO-RAOINT-NEXT: retq
+;
+; X64-RAO-INT-LABEL: atomic_sub32:
+; X64-RAO-INT: # %bb.0:
+; X64-RAO-INT-NEXT: movl %esi, %eax
+; X64-RAO-INT-NEXT: negl %eax
+; X64-RAO-INT-NEXT: aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT: mfence
+; X64-RAO-INT-NEXT: aaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT: lock xaddl %eax, (%rdi)
+; X64-RAO-INT-NEXT: retq
+ %1 = atomicrmw sub i32* %p, i32 %val monotonic
+ %2 = atomicrmw sub i32* %p, i32 %val acquire
+ %3 = atomicrmw sub i32* %p, i32 %val release
+ %4 = atomicrmw sub i32* %p, i32 %val acq_rel
+ %5 = atomicrmw sub i32* %p, i32 %val seq_cst
+
+ %6 = atomicrmw sub i32* %p, i32 %val seq_cst
+ ret i32 %6
+}
Index: llvm/lib/Target/X86/X86InstrRAOINT.td
===================================================================
--- /dev/null
+++ llvm/lib/Target/X86/X86InstrRAOINT.td
@@ -0,0 +1,45 @@
+//===---- X86InstrRAOINT.td -------------------------------*- tablegen -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel AMX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// RAO-INT instructions
+
+def SDTRAOBinaryArith : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+
+def X86rao_add : SDNode<"X86ISD::RADD", SDTRAOBinaryArith,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86rao_or : SDNode<"X86ISD::ROR", SDTRAOBinaryArith,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86rao_xor : SDNode<"X86ISD::RXOR", SDTRAOBinaryArith,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86rao_and : SDNode<"X86ISD::RAND", SDTRAOBinaryArith,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+multiclass RAOINT_BASE<string OpcodeStr> {
+ let Predicates = [HasRAOINT] in
+ def 32mr : I<0xfc, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ !strconcat("a", OpcodeStr, "{l}\t{$src, $dst|$dst, $src}"),
+ [(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR32:$src)]>,
+ Sched<[WriteALURMW]>;
+
+ let Predicates = [HasRAOINT, In64BitMode] in
+ def 64mr : I<0xfc, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ !strconcat("a", OpcodeStr, "{q}\t{$src, $dst|$dst, $src}"),
+ [(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR64:$src)]>,
+ Sched<[WriteALURMW]>, REX_W;
+}
+
+defm AADD : RAOINT_BASE<"add">, T8PS;
+defm AAND : RAOINT_BASE<"and">, T8PD;
+defm AOR : RAOINT_BASE<"or" >, T8XD;
+defm AXOR : RAOINT_BASE<"xor">, T8XS;
Index: llvm/lib/Target/X86/X86InstrInfo.td
===================================================================
--- llvm/lib/Target/X86/X86InstrInfo.td
+++ llvm/lib/Target/X86/X86InstrInfo.td
@@ -980,6 +980,7 @@
def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">;
def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">;
def HasKL : Predicate<"Subtarget->hasKL()">;
+def HasRAOINT : Predicate<"Subtarget->hasRAOINT()">;
def HasWIDEKL : Predicate<"Subtarget->hasWIDEKL()">;
def HasHRESET : Predicate<"Subtarget->hasHRESET()">;
def HasSERIALIZE : Predicate<"Subtarget->hasSERIALIZE()">;
@@ -3167,6 +3168,9 @@
// AMX instructions
include "X86InstrAMX.td"
+// RAO-INT instructions
+include "X86InstrRAOINT.td"
+
// System instructions.
include "X86InstrSystem.td"
Index: llvm/lib/Target/X86/X86ISelLowering.h
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.h
+++ llvm/lib/Target/X86/X86ISelLowering.h
@@ -793,6 +793,13 @@
LBTC,
LBTR,
+ /// RAO arithmetic instructions.
+ /// OUTCHAIN = RADD(INCHAIN, PTR, RHS)
+ RADD,
+ ROR,
+ RXOR,
+ RAND,
+
// Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31796,6 +31796,52 @@
return N;
}
+ // We can lower add/sub/or/xor/and into RAO-INT instructions when the result
+ // is unused.
+ // TODO: We can manually widen i8/i16 to i32 here to use RAO-INT instruction.
+ if (Subtarget.hasRAOINT() &&
+ (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()))) {
+ if (Opc == ISD::ATOMIC_LOAD_SUB) {
+ RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, RHS,
+ AN->getMemOperand());
+ }
+ unsigned NewOpc = 0;
+ switch (N->getOpcode()) {
+ case ISD::ATOMIC_LOAD_ADD:
+ NewOpc = X86ISD::RADD;
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ NewOpc = X86ISD::ROR;
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ NewOpc = X86ISD::RXOR;
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ NewOpc = X86ISD::RAND;
+ break;
+ default:
+ llvm_unreachable("Unexpected ATOMIC_LOAD_ opcode");
+ }
+
+ // RAO-INT instructions are weakly-ordered. We need insert MFENCE for order
+ // stronger than monotonic.
+ // FIXME: Do we just need LFENCE for acquire?
+ // FIXME: Do we need trailing fence?
+ if (isStrongerThanMonotonic(AN->getSuccessOrdering())) {
+ assert(Subtarget.hasMFence() && "MFENCE is required");
+ Chain = DAG.getNode(X86ISD::MFENCE, DL, MVT::Other, Chain);
+ }
+
+ MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
+ SDValue RAO = DAG.getMemIntrinsicNode(NewOpc, DL, DAG.getVTList(MVT::Other),
+ {Chain, LHS, RHS}, VT, MMO);
+
+ // NOTE: The getUNDEF is needed to give something for the unused result 0.
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), DAG.getUNDEF(VT),
+ RAO);
+ }
+
// Specialized lowering for the canonical form of an idemptotent atomicrmw.
// The core idea here is that since the memory location isn't actually
// changing, all we need is a lowering for the *ordering* impacts of the
@@ -33709,6 +33755,10 @@
NODE_NAME_CASE(LBTS)
NODE_NAME_CASE(LBTC)
NODE_NAME_CASE(LBTR)
+ NODE_NAME_CASE(RADD)
+ NODE_NAME_CASE(ROR)
+ NODE_NAME_CASE(RXOR)
+ NODE_NAME_CASE(RAND)
NODE_NAME_CASE(VZEXT_MOVL)
NODE_NAME_CASE(VZEXT_LOAD)
NODE_NAME_CASE(VEXTRACT_STORE)
Index: llvm/lib/Target/X86/X86.td
===================================================================
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -254,6 +254,9 @@
def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
"Support AMX-BF16 instructions",
[FeatureAMXTILE]>;
+def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true",
+ "Support RAO-INT instructions",
+ [FeatureSSE2]>;
def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
"Invalidate Process-Context Identifier">;
def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
Index: llvm/lib/Support/X86TargetParser.cpp
===================================================================
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -581,6 +581,7 @@
constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesHRESET = {};
+static constexpr FeatureBitset ImpliedFeaturesRAOINT = FeatureSSE2;
static constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL;
// Key Locker Features
Index: llvm/lib/Support/Host.cpp
===================================================================
--- llvm/lib/Support/Host.cpp
+++ llvm/lib/Support/Host.cpp
@@ -1805,6 +1805,7 @@
Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
bool HasLeaf7Subleaf1 =
MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+ Features["raoint"] = HasLeaf7Subleaf1 && ((EAX >> 3) & 1);
Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
Index: llvm/include/llvm/Support/X86TargetParser.def
===================================================================
--- llvm/include/llvm/Support/X86TargetParser.def
+++ llvm/include/llvm/Support/X86TargetParser.def
@@ -200,6 +200,7 @@
X86_FEATURE (XSAVEOPT, "xsaveopt")
X86_FEATURE (XSAVES, "xsaves")
X86_FEATURE (HRESET, "hreset")
+X86_FEATURE (RAOINT, "raoint")
X86_FEATURE (AVX512FP16, "avx512fp16")
X86_FEATURE (AVXVNNI, "avxvnni")
// These features aren't really CPU features, but the frontend can set them.
Index: clang/test/Preprocessor/x86_target_features.c
===================================================================
--- clang/test/Preprocessor/x86_target_features.c
+++ clang/test/Preprocessor/x86_target_features.c
@@ -581,6 +581,14 @@
// AVX512FP16NOAVX512DQ-NOT: #define __AVX512DQ__ 1
// AVX512FP16NOAVX512DQ-NOT: #define __AVX512FP16__ 1
+// RUN: %clang -target i386-unknown-linux-gnu -march=atom -mraoint -x c -E -dM -o - %s | FileCheck -check-prefix=RAOINT %s
+
+// RAOINT: #define __RAOINT__ 1
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=atom -mno-raoint -x c -E -dM -o - %s | FileCheck -check-prefix=NO-RAOINT %s
+
+// NO-RAOINT-NOT: #define __RAOINT__ 1
+
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s
// CRC32: #define __CRC32__ 1
Index: clang/test/Driver/x86-target-features.c
===================================================================
--- clang/test/Driver/x86-target-features.c
+++ clang/test/Driver/x86-target-features.c
@@ -305,6 +305,11 @@
// AVX512FP16: "-target-feature" "+avx512fp16"
// NO-AVX512FP16: "-target-feature" "-avx512fp16"
+// RUN: %clang --target=i386 -march=i386 -mraoint %s -### 2>&1 | FileCheck -check-prefix=RAOINT %s
+// RUN: %clang --target=i386 -march=i386 -mno-raoint %s -### 2>&1 | FileCheck -check-prefix=NO-RAOINT %s
+// RAOINT: "-target-feature" "+raoint"
+// NO-RAOINT: "-target-feature" "-raoint"
+
// RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s
// RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
// CRC32: "-target-feature" "+crc32"
Index: clang/lib/Headers/cpuid.h
===================================================================
--- clang/lib/Headers/cpuid.h
+++ clang/lib/Headers/cpuid.h
@@ -200,6 +200,7 @@
#define bit_AMXINT8 0x02000000
/* Features in %eax for leaf 7 sub-leaf 1 */
+#define bit_RAOINT 0x00000008
#define bit_AVXVNNI 0x00000010
#define bit_AVX512BF16 0x00000020
#define bit_HRESET 0x00400000
Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -135,6 +135,7 @@
bool HasPTWRITE = false;
bool HasINVPCID = false;
bool HasENQCMD = false;
+ bool HasRAOINT = false;
bool HasKL = false; // For key locker
bool HasWIDEKL = false; // For wide key locker
bool HasHRESET = false;
Index: clang/lib/Basic/Targets/X86.cpp
===================================================================
--- clang/lib/Basic/Targets/X86.cpp
+++ clang/lib/Basic/Targets/X86.cpp
@@ -330,6 +330,8 @@
HasAMXINT8 = true;
} else if (Feature == "+amx-tile") {
HasAMXTILE = true;
+ } else if (Feature == "+raoint") {
+ HasRAOINT = true;
} else if (Feature == "+avxvnni") {
HasAVXVNNI = true;
} else if (Feature == "+serialize") {
@@ -774,6 +776,8 @@
Builder.defineMacro("__AMXINT8__");
if (HasAMXBF16)
Builder.defineMacro("__AMXBF16__");
+ if (HasRAOINT)
+ Builder.defineMacro("__RAOINT__");
if (HasAVXVNNI)
Builder.defineMacro("__AVXVNNI__");
if (HasSERIALIZE)
@@ -932,6 +936,7 @@
.Case("prefetchwt1", true)
.Case("prfchw", true)
.Case("ptwrite", true)
+ .Case("raoint", true)
.Case("rdpid", true)
.Case("rdpru", true)
.Case("rdrnd", true)
@@ -1028,6 +1033,7 @@
.Case("prefetchwt1", HasPREFETCHWT1)
.Case("prfchw", HasPRFCHW)
.Case("ptwrite", HasPTWRITE)
+ .Case("raoint", HasRAOINT)
.Case("rdpid", HasRDPID)
.Case("rdpru", HasRDPRU)
.Case("rdrnd", HasRDRND)
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -4654,6 +4654,8 @@
def mno_prfchw : Flag<["-"], "mno-prfchw">, Group<m_x86_Features_Group>;
def mptwrite : Flag<["-"], "mptwrite">, Group<m_x86_Features_Group>;
def mno_ptwrite : Flag<["-"], "mno-ptwrite">, Group<m_x86_Features_Group>;
+def mraoint : Flag<["-"], "mraoint">, Group<m_x86_Features_Group>;
+def mno_raoint : Flag<["-"], "mno-raoint">, Group<m_x86_Features_Group>;
def mrdpid : Flag<["-"], "mrdpid">, Group<m_x86_Features_Group>;
def mno_rdpid : Flag<["-"], "mno-rdpid">, Group<m_x86_Features_Group>;
def mrdpru : Flag<["-"], "mrdpru">, Group<m_x86_Features_Group>;
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -550,6 +550,7 @@
--------------------
- Support ``-mindirect-branch-cs-prefix`` for call and jmp to indirect thunk.
- Fix 32-bit ``__fastcall`` and ``__vectorcall`` ABI mismatch with MSVC.
+- Add support for ``RAO-INT`` instructions.
DWARF Support in Clang
----------------------
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits