jfb updated this revision to Diff 282281.
jfb marked 9 inline comments as done.
jfb added a comment.

Address Richard's comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79279/new/

https://reviews.llvm.org/D79279

Files:
  clang/docs/LanguageExtensions.rst
  clang/include/clang/Basic/Builtins.def
  clang/include/clang/Basic/DiagnosticASTKinds.td
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Sema/Sema.h
  clang/lib/AST/ExprConstant.cpp
  clang/lib/CodeGen/CGBuilder.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtin-overloaded-memfns.c
  clang/test/CodeGenObjC/builtin-memfns.m
  clang/test/Sema/builtin-overloaded-memfns.cpp
  clang/test/SemaCXX/constexpr-string.cpp

Index: clang/test/SemaCXX/constexpr-string.cpp
===================================================================
--- clang/test/SemaCXX/constexpr-string.cpp
+++ clang/test/SemaCXX/constexpr-string.cpp
@@ -675,4 +675,25 @@
     return true;
   }
   static_assert(test_address_of_incomplete_struct_type()); // expected-error {{constant}} expected-note {{in call}}
+
+  template <typename T, int ElNum>
+  constexpr auto test_memcpy_overloaded(int dst_off, int src_off, int num) {
+    T dst[4] = {0, 0, 0, 0};
+    const T src[4] = {1, 2, 3, 4};
+    // expected-note@+2 {{size parameter is 12, expected a size that is evenly divisible by element size 8}}
+    // expected-note@+1 {{size parameter is 4, expected a size that is evenly divisible by element size 8}}
+    __builtin_memcpy_overloaded(dst + dst_off, src + src_off, num * sizeof(T), ElNum * sizeof(T));
+    return result(dst);
+  }
+
+  static_assert(test_memcpy_overloaded<int, 1>(0, 0, 1) == 1000);
+  static_assert(test_memcpy_overloaded<int, 1>(0, 0, 2) == 1200);
+  static_assert(test_memcpy_overloaded<int, 1>(0, 0, 3) == 1230);
+  static_assert(test_memcpy_overloaded<int, 1>(0, 0, 4) == 1234);
+  static_assert(test_memcpy_overloaded<int, 2>(0, 0, 4) == 1234);
+
+  // expected-error@+1 {{static_assert expression is not an integral constant expression}}
+  static_assert(test_memcpy_overloaded<int, 2>(0, 0, 3) == 1234); // expected-note {{in call to 'test_memcpy_overloaded(0, 0, 3)'}}
+  // expected-error@+1 {{static_assert expression is not an integral constant expression}}
+  static_assert(test_memcpy_overloaded<int, 2>(0, 0, 1) == 1234); // expected-note {{in call to 'test_memcpy_overloaded(0, 0, 1)'}}
 }
Index: clang/test/Sema/builtin-overloaded-memfns.cpp
===================================================================
--- /dev/null
+++ clang/test/Sema/builtin-overloaded-memfns.cpp
@@ -0,0 +1,252 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only -triple=arm64-unknown-unknown -fms-extensions -DCPY=1
+// RUN: %clang_cc1 %s -verify -fsyntax-only -triple=arm64-unknown-unknown -fms-extensions -DCPY=0
+
+// Test memcpy and memmove with the same code, since they're basically the same constraints.
+#if CPY
+#define MEM(...) __builtin_memcpy_overloaded(__VA_ARGS__)
+#else
+#define MEM(...) __builtin_memmove_overloaded(__VA_ARGS__)
+#endif
+
+#define NULL (void *)0
+#define nullptr __nullptr
+using size_t = __SIZE_TYPE__;
+using sizeless_t = __SVInt8_t;
+using float4 = float __attribute__((ext_vector_type(4)));
+struct Intish {
+  int i;
+};
+struct NotLockFree {
+  char buf[512];
+};
+struct TrivialCpy {
+  char buf[8];
+  TrivialCpy();
+  TrivialCpy(const TrivialCpy &) = default;
+};
+struct NotTrivialCpy {
+  char buf[8];
+  NotTrivialCpy();
+  NotTrivialCpy(const NotTrivialCpy &);
+};
+
+constexpr int CONSTEXPR_ONE = 1;
+
+void arg_count() {
+  MEM();                                      // expected-error {{too few arguments to function call, expected 3, have 0}}
+  MEM(0);                                     // expected-error {{too few arguments to function call, expected 3, have 1}}
+  MEM(0, 0);                                  // expected-error {{too few arguments to function call, expected 3, have 2}}
+  MEM(0, 0, 0, 0, 0);                         // expected-error {{too many arguments to function call, expected 4, have 5}}
+  __builtin_memset_overloaded();              // expected-error {{too few arguments to function call, expected 3, have 0}}
+  __builtin_memset_overloaded(0);             // expected-error {{too few arguments to function call, expected 3, have 1}}
+  __builtin_memset_overloaded(0, 0);          // expected-error {{too few arguments to function call, expected 3, have 2}}
+  __builtin_memset_overloaded(0, 0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 4, have 5}}
+}
+
+void null(char *dst, const char *src, size_t size) {
+  MEM(0, src, 0);                              // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(0, src, size);                           // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(dst, 0, 0);                              // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(dst, 0, size);                           // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  __builtin_memset_overloaded(0, 0, 0);        // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  __builtin_memset_overloaded(0, 0, size);     // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(dst, 0, 42);                             // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(dst, 0, 42);                             // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(dst, NULL, 42);                          // expected-warning {{null passed to a callee that requires a non-null argument}}
+  MEM(dst, nullptr, 42);                       // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'nullptr_t'}}
+  MEM(0, src, 42);                             // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(NULL, src, 42);                          // expected-warning {{null passed to a callee that requires a non-null argument}}
+  MEM(nullptr, src, 42);                       // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'nullptr_t'}}
+  __builtin_memset_overloaded(0, 0, 42);       // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  __builtin_memset_overloaded(NULL, 0, 42);    // expected-warning {{null passed to a callee that requires a non-null argument}}
+  __builtin_memset_overloaded(nullptr, 0, 42); // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'nullptr_t'}}
+}
+
+void good_arg_types(char *dst, const char *src, size_t size) {
+  MEM(dst, src, 0);
+  MEM(dst, dst, ~(size_t)0);
+  MEM(dst, src, 42);
+  MEM(dst, src, size);
+  MEM(dst, (char *)src, size);
+  MEM(dst, (const void *)src, size);
+  MEM((void *)dst, src, size);
+  MEM(dst, (volatile const char *)src, size);
+  MEM((volatile char *)dst, src, size);
+  MEM(dst, (__unaligned const char *)src, size);
+  MEM((__unaligned char *)dst, src, size);
+  MEM(dst, (const char *__restrict)src, size);
+  MEM((char *__restrict)dst, src, size);
+
+  MEM(dst, (const __attribute__((address_space(32))) char *)src, size);
+  MEM((__attribute__((address_space(32))) char *)dst, src, size);
+  MEM((__attribute__((address_space(32))) char *)dst, (const __attribute__((address_space(64))) char *)src, size);
+  MEM(dst, (__attribute__((address_space(32))) __unaligned const volatile void *__restrict)src, size);
+  MEM((__attribute__((address_space(32))) __unaligned volatile void *__restrict)dst, src, size);
+
+  MEM(dst, (const char *)src, size, 1);
+  MEM(dst, (const char *)src, size, 2);
+  MEM(dst, (const char *)src, size, 4);
+  MEM(dst, (const char *)src, size, 8);
+  MEM(dst, (const char *)src, size, 16);
+  MEM((char *)dst, src, size, 1);
+  MEM((int *)dst, (const Intish *)src, size, 4);
+  MEM((Intish *)dst, (const int *)src, size, 4);
+  MEM(dst, src, size, CONSTEXPR_ONE);
+
+  __builtin_memset_overloaded(dst, 0, 0);
+  __builtin_memset_overloaded(dst, 0, ~(size_t)0);
+  __builtin_memset_overloaded(dst, 0, 42);
+  __builtin_memset_overloaded(dst, 0, size);
+  __builtin_memset_overloaded((void *)dst, 0, size);
+  __builtin_memset_overloaded((volatile char *)dst, 0, size);
+  __builtin_memset_overloaded((__unaligned char *)dst, 0, size);
+  __builtin_memset_overloaded((int *)dst, 0, size);
+  __builtin_memset_overloaded((__attribute__((address_space(32))) char *)dst, 0, size);
+  __builtin_memset_overloaded((__attribute__((address_space(32))) __unaligned volatile void *)dst, 0, size);
+
+  __builtin_memset_overloaded((char *)dst, 0, size, 1);
+  __builtin_memset_overloaded((char *)dst, 0, size, 2);
+  __builtin_memset_overloaded((char *)dst, 0, size, 4);
+  __builtin_memset_overloaded((char *)dst, 0, size, 8);
+  __builtin_memset_overloaded((char *)dst, 0, size, 16);
+  __builtin_memset_overloaded((Intish *)dst, 0, size, 4);
+  __builtin_memset_overloaded(dst, 0, size, CONSTEXPR_ONE);
+}
+
+// expected-note@+1 2 {{declared here}}
+void bad_arg_types(char *dst, const char *src, size_t size) {
+  MEM(dst, 42, size);                                                                        // expected-error {{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(42, src, size);                                                                        // expected-error {{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(dst, src, dst);                                                                        // expected-error {{cannot initialize a parameter of type 'unsigned long' with an lvalue of type 'char *'}}
+  MEM((const char *)dst, src, size);                                                         // expected-error {{argument must be non-const, got 'const char'}}
+  MEM((__attribute__((address_space(32))) __unaligned const volatile char *)dst, src, size); // expected-error {{argument must be non-const, got 'const volatile __unaligned __attribute__((address_space(32))) char'}}
+
+  MEM(dst, (_Atomic const char *)src, size);          // expected-error{{parameter cannot have the _Atomic qualifier ('const _Atomic(char)' invalid)}}
+  MEM((_Atomic char *)dst, src, size);                // expected-error{{parameter cannot have the _Atomic qualifier ('_Atomic(char)' invalid)}}
+  MEM((int *)dst, (_Atomic const Intish *)src, size); // expected-error{{parameter cannot have the _Atomic qualifier ('const _Atomic(Intish)' invalid)}}
+  MEM((_Atomic Intish *)dst, (const int *)src, size); // expected-error{{parameter cannot have the _Atomic qualifier ('_Atomic(Intish)' invalid)}}
+  MEM((void *)dst, (_Atomic const int *)src, size);   // expected-error{{parameter cannot have the _Atomic qualifier ('const _Atomic(int)' invalid)}}
+  MEM((_Atomic int *)dst, (const void *)src, size);   // expected-error{{parameter cannot have the _Atomic qualifier ('_Atomic(int)' invalid)}}
+
+  // expected-note@+1 {{read of non-const variable 'size' is not allowed in a constant expression}}
+  MEM(dst, src, size, size);                                                    // expected-error{{expression is not an integral constant expression}}
+  MEM(dst, src, size, -1);                                                      // expected-error{{argument should be a power of 2}}
+  MEM(dst, src, size, 0);                                                       // expected-error{{argument should be a power of 2}}
+  MEM(dst, src, size, 3);                                                       // expected-error{{argument should be a power of 2}}
+  MEM(dst, src, size, 32);                                                      // expected-error{{lock-free}}
+  MEM((int *)dst, src, size, 1);                                                // expected-error{{number of bytes to copy must be a multiple of pointer element size, got 1 bytes to copy with element size 4 for 'int'}}
+  MEM(dst, (const int *)src, size, 1);                                          // expected-error{{number of bytes to copy must be a multiple of pointer element size, got 1 bytes to copy with element size 4 for 'const int'}}
+  MEM((NotLockFree *)dst, (const NotLockFree *)src, size, sizeof(NotLockFree)); // expected-error{{element size must be a lock-free size, 512 exceeds 16 bytes}}
+  MEM((void *)dst, src, size, 4);                                               // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('void' invalid)}}
+  MEM(dst, (const void *)src, size, 4);                                         // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('const void' invalid)}}
+  MEM(dst, (volatile const char *)src, size, 1);                                // expected-error{{specifying an access size for volatile memory operations is unsupported ('const volatile char' is volatile)}}
+  MEM((volatile char *)dst, src, size, 1);                                      // expected-error{{specifying an access size for volatile memory operations is unsupported ('volatile char' is volatile)}}
+
+  __builtin_memset_overloaded(42, 0, size);                                                                        // expected-error {{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  __builtin_memset_overloaded((const char *)dst, 0, size);                                                         // expected-error {{argument must be non-const, got 'const char'}}
+  __builtin_memset_overloaded((__attribute__((address_space(32))) __unaligned const volatile char *)dst, 0, size); // expected-error {{argument must be non-const, got 'const volatile __unaligned __attribute__((address_space(32))) char'}}
+  __builtin_memset_overloaded((_Atomic char *)dst, 0, size);                                                       // expected-error{{parameter cannot have the _Atomic qualifier ('_Atomic(char)' invalid)}}
+  __builtin_memset_overloaded((_Atomic Intish *)dst, 0, size);                                                     // expected-error{{parameter cannot have the _Atomic qualifier ('_Atomic(Intish)' invalid)}}
+
+  // expected-note@+1 {{read of non-const variable 'size' is not allowed in a constant expression}}
+  __builtin_memset_overloaded(dst, 0, size, size);                               // expected-error{{expression is not an integral constant expression}}
+  __builtin_memset_overloaded(dst, 0, size, -1);                                 // expected-error{{argument should be a power of 2}}
+  __builtin_memset_overloaded(dst, 0, size, 0);                                  // expected-error{{argument should be a power of 2}}
+  __builtin_memset_overloaded(dst, 0, size, 3);                                  // expected-error{{argument should be a power of 2}}
+  __builtin_memset_overloaded(dst, 0, size, 32);                                 // expected-error{{lock-free}}
+  __builtin_memset_overloaded((int *)dst, 0, size, 1);                           // expected-error{{number of bytes to copy must be a multiple of pointer element size, got 1 bytes to copy with element size 4 for 'int'}}
+  __builtin_memset_overloaded((volatile char *)dst, 0, size, 1);                 // expected-error{{specifying an access size for volatile memory operations is unsupported ('volatile char' is volatile)}}
+  __builtin_memset_overloaded((NotLockFree *)dst, 0, size, sizeof(NotLockFree)); // expected-error{{element size must be a lock-free size, 512 exceeds 16 bytes}}
+  __builtin_memset_overloaded((void *)dst, 0, size, 1);                          // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('void' invalid)}}
+}
+
+void array_arg_types() {
+  extern char adst[512];
+  extern volatile char avdst[512];
+  extern const char asrc[512];
+  extern const volatile char avsrc[512];
+
+  MEM(adst, asrc, sizeof(adst));
+  MEM(avdst, avsrc, sizeof(avdst));
+  MEM(asrc, asrc, sizeof(adst));     // expected-error {{argument must be non-const, got 'const char'}}
+  MEM(adst, asrc, sizeof(adst) + 1); // TODO diagnose size overflow?
+  __builtin_memset_overloaded(adst, 0, sizeof(adst));
+  __builtin_memset_overloaded(avdst, 0, sizeof(avdst));
+  __builtin_memset_overloaded(asrc, 0, sizeof(asrc));     // expected-error {{argument must be non-const, got 'const char'}}
+  __builtin_memset_overloaded(adst, 0, sizeof(adst) + 1); // TODO diagnose size overflow?
+}
+
+void atomic_array_arg_types() {
+  extern char adst[512];
+  extern volatile char avdst[512];
+  extern const char asrc[512];
+  extern const volatile char avsrc[512];
+
+  MEM(adst, asrc, sizeof(adst), 1);
+  MEM(avdst, asrc, sizeof(adst), 1); // expected-error{{specifying an access size for volatile memory operations is unsupported ('volatile char' is volatile)}}
+  MEM(adst, avsrc, sizeof(adst), 1); // expected-error{{specifying an access size for volatile memory operations is unsupported ('const volatile char' is volatile)}}
+  __builtin_memset_overloaded(adst, 0, sizeof(adst), 1);
+  __builtin_memset_overloaded(avdst, 0, sizeof(avdst), 1); // expected-error{{specifying an access size for volatile memory operations is unsupported ('volatile char' is volatile)}}
+}
+
+void trivial_arg_types() {
+  TrivialCpy trivialDst;
+  const TrivialCpy trivialSrc;
+  MEM(&trivialDst, &trivialSrc, sizeof(TrivialCpy));
+  MEM((__attribute__((address_space(32))) __unaligned volatile TrivialCpy * __restrict) & trivialDst, (__attribute__((address_space(64))) __unaligned const volatile TrivialCpy *__restrict) & trivialSrc, sizeof(TrivialCpy));
+  __builtin_memset_overloaded(&trivialDst, 0, sizeof(trivialDst));
+  __builtin_memset_overloaded((__attribute__((address_space(32))) __unaligned volatile TrivialCpy * __restrict) & trivialDst, 0, sizeof(trivialDst));
+
+  TrivialCpy trivialDstArr[2];
+  const TrivialCpy trivialSrcArr[2];
+  MEM(trivialDstArr, trivialSrcArr, sizeof(TrivialCpy) * 2);
+  __builtin_memset_overloaded(trivialDstArr, 0, sizeof(TrivialCpy) * 2);
+}
+
+void nontrivial_arg_types() {
+  NotTrivialCpy notTrivialDst;
+  const NotTrivialCpy notTrivialSrc;
+  MEM(&notTrivialDst, &notTrivialSrc, sizeof(NotTrivialCpy), sizeof(NotTrivialCpy));            // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('NotTrivialCpy' invalid)}}
+  __builtin_memset_overloaded(&notTrivialDst, 0, sizeof(NotTrivialCpy), sizeof(NotTrivialCpy)); // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('NotTrivialCpy' invalid)}}
+
+  NotTrivialCpy notTrivialDstArr[2];
+  const NotTrivialCpy notTrivialSrcArr[2];
+  MEM(notTrivialDstArr, notTrivialSrcArr, sizeof(NotTrivialCpy) * 2, sizeof(NotTrivialCpy));          // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('NotTrivialCpy' invalid)}}
+  __builtin_memset_overloaded(notTrivialDstArr, 0, sizeof(NotTrivialCpy) * 2, sizeof(NotTrivialCpy)); // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('NotTrivialCpy' invalid)}}
+}
+
+class Incomplete;
+void inclomplete_arg_types(char *dst, const char *src, size_t size) {
+  MEM((Incomplete *)dst, src, size, 1);                       // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('Incomplete' invalid)}}
+  MEM(dst, (const Incomplete *)src, size, 1);                 // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('const Incomplete' invalid)}}
+  __builtin_memset_overloaded((Incomplete *)dst, 0, size, 1); // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('Incomplete' invalid)}}
+}
+
+void sizeless_arg_types(char *dst, const char *src, size_t size) {
+  MEM((sizeless_t *)dst, src, size);
+  MEM(dst, (const sizeless_t *)src, size);
+  __builtin_memset_overloaded((sizeless_t *)dst, 0, size);
+
+  MEM((sizeless_t *)dst, src, size, 1);
+  MEM(dst, (const sizeless_t *)src, size, 1);
+  __builtin_memset_overloaded((sizeless_t *)dst, 0, size, 1);
+}
+
+void vector_arg_types(char *dst, const char *src, size_t size) {
+  MEM((float4 *)dst, src, size);
+  MEM(dst, (const float4 *)src, size);
+  __builtin_memset_overloaded((float4 *)dst, 0, size);
+
+  MEM((float4 *)dst, (const float4 *)src, size, sizeof(float4));
+  MEM((float4 *)dst, (const float4 *)src, size, sizeof(float4));
+  __builtin_memset_overloaded((float4 *)dst, 0, size, sizeof(float4));
+}
+
+void extint_arg_types(char *dst, const char *src, size_t size) {
+  MEM((_ExtInt(2) *)dst, src, size);
+  MEM(dst, (const _ExtInt(2) *)src, size);
+  __builtin_memset_overloaded((_ExtInt(2) *)dst, 0, size);
+
+  MEM((_ExtInt(8) *)dst, (const _ExtInt(8) *)src, size, 1);
+  __builtin_memset_overloaded((_ExtInt(8) *)dst, 0, size, 1);
+}
Index: clang/test/CodeGenObjC/builtin-memfns.m
===================================================================
--- clang/test/CodeGenObjC/builtin-memfns.m
+++ clang/test/CodeGenObjC/builtin-memfns.m
@@ -1,10 +1,38 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.8.0 -emit-llvm -o - %s | FileCheck %s
 
-void *memcpy(void *restrict s1, const void *restrict s2, unsigned long n);
+typedef __SIZE_TYPE__ size_t;
+
+void *memcpy(void *restrict s1, const void *restrict s2, size_t n);
+void *memmove(void *restrict s1, const void *restrict s2, size_t n);
+void *memset(void *s1, int v, size_t n);
 
 // PR13697
-void test1(int *a, id b) {
-  // CHECK: @test1
+void cpy1(int *a, id b) {
+  // CHECK-LABEL: @cpy1(
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false)
+  memcpy(a, b, 8);
+}
+
+void cpy2(id a, int *b) {
+  // CHECK-LABEL: @cpy2(
   // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false)
   memcpy(a, b, 8);
 }
+
+void move1(int *a, id b) {
+  // CHECK-LABEL: @move1(
+  // CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false)
+  memmove(a, b, 8);
+}
+
+void move2(id a, int *b) {
+  // CHECK-LABEL: @move2(
+  // CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false)
+  memmove(a, b, 8);
+}
+
+void set(id a) {
+  // CHECK-LABEL: @set(
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 42, i64 8, i1 false)
+  memset(a, 42, 8);
+}
Index: clang/test/CodeGen/builtin-overloaded-memfns.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/builtin-overloaded-memfns.c
@@ -0,0 +1,337 @@
+// RUN: %clang_cc1 -triple arm64-unknown-unknown -fms-extensions -emit-llvm < %s| FileCheck %s
+
+typedef __SIZE_TYPE__ size_t;
+
+// CHECK-LABEL: volatile_dst_cpy_void(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_cpy_void(volatile void *dst, const void *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dst_move_void(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_move_void(volatile void *dst, const void *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dst_set_void(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_set_void(volatile void *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size); }
+
+// CHECK-LABEL: volatile_src_cpy_void(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_src_cpy_void(void *dst, volatile const void *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_src_move_void(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_src_move_void(void *dst, volatile const void *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dstsrc_cpy_void(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dstsrc_cpy_void(volatile void *dst, volatile const void *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dstsrc_move_void(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dstsrc_move_void(volatile void *dst, volatile const void *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dst_cpy_char(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_cpy_char(volatile char *dst, const char *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dst_move_char(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_move_char(volatile char *dst, const char *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dst_set_char(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_set_char(volatile char *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size); }
+
+// CHECK-LABEL: volatile_dst_cpy_int(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_cpy_int(volatile int *dst, const int *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dst_move_int(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_move_int(volatile int *dst, const int *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dst_set_int(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_set_int(volatile int *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size); }
+
+// CHECK-LABEL: unaligned_dst_cpy_int(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void unaligned_dst_cpy_int(__unaligned int *dst, const int *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: unaligned_dst_move_int(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void unaligned_dst_move_int(__unaligned int *dst, const int *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: unaligned_dst_set_int(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i1 false)
+void unaligned_dst_set_int(__unaligned int *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size); }
+
+// CHECK-LABEL: unaligned_src_cpy_int(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void unaligned_src_cpy_int(int *dst, __unaligned const int *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: unaligned_src_move_int(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void unaligned_src_move_int(int *dst, __unaligned const int *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: addrspace_srcdst_cpy_char(
+// CHECK: call void @llvm.memcpy.p32i8.p32i8.i64(i8 addrspace(32)* align 1 %{{[0-9]*}}, i8 addrspace(32)* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void addrspace_srcdst_cpy_char(__attribute__((address_space(32))) char *dst, __attribute__((address_space(32))) const char *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: addrspace_srcdst_move_char(
+// CHECK: call void @llvm.memmove.p32i8.p32i8.i64(i8 addrspace(32)* align 1 %{{[0-9]*}}, i8 addrspace(32)* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void addrspace_srcdst_move_char(__attribute__((address_space(32))) char *dst, __attribute__((address_space(32))) const char *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: addrspace_dst_set_char(
+// CHECK: call void @llvm.memset.p32i8.i64(i8 addrspace(32)* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i1 false)
+void addrspace_dst_set_char(__attribute__((address_space(32))) char *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size); }
+
+// CHECK-LABEL: restrict_srcdst_cpy_char(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void restrict_srcdst_cpy_char(char *__restrict dst, const char *__restrict src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: restrict_srcdst_move_char(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void restrict_srcdst_move_char(char *__restrict dst, const char *__restrict src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: restrict_dst_set_char(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i1 false)
+void restrict_dst_set_char(char *__restrict dst, size_t size) { __builtin_memset_overloaded(dst, 0, size); }
+
+// CHECK-LABEL: atomic_srcdst_cpy_char(
+// CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 1)
+void atomic_srcdst_cpy_char(char *dst, const char *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size, 1); }
+
+#if 0 // FIXME: open question on alignment.
+// FIXME-CHECK-LABEL: atomic_srcdst_cpy_char_big(
+// FIXME-CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 16)
+void atomic_srcdst_cpy_char_big(char *dst, const char *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size, 16); }
+#endif
+
+// FIXME-CHECK-LABEL: atomic_srcdst_move_char(
+// FIXME-CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 1)
+void atomic_srcdst_move_char(char *dst, const char *src, size_t size) { __builtin_memmove_overloaded(dst, src, size, 1); }
+
+#if 0 // FIXME: open question on alignment.
+// FIXME-CHECK-LABEL: atomic_srcdst_move_char_big(
+// FIXME-CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 16)
+void atomic_srcdst_move_char_big(char *dst, const char *src, size_t size) { __builtin_memmove_overloaded(dst, src, size, 16); }
+#endif
+
+// CHECK-LABEL: atomic_dst_set_char(
+// CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i32 1)
+void atomic_dst_set_char(char *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size, 1); }
+
+#if 0 // FIXME: open question on alignment.
+// FIXME-CHECK-LABEL: atomic_dst_set_char_big(
+// FIXME-CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i32 16)
+void atomic_dst_set_char_big(char *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size, 16); }
+#endif
+
+// CHECK-LABEL: atomic_srcdst_cpy_int(
+// CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 4)
+void atomic_srcdst_cpy_int(int *dst, const int *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size, 4); }
+
+// CHECK-LABEL: atomic_srcdst_move_int(
+// CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 4)
+void atomic_srcdst_move_int(int *dst, const int *src, size_t size) { __builtin_memmove_overloaded(dst, src, size, 4); }
+
+// CHECK-LABEL: atomic_dst_set_int(
+// CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i32 4)
+void atomic_dst_set_int(int *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size, 4); }
+
+// CHECK-LABEL: atomic_srcdst_cpy_longlong(
+// CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 8 %{{[0-9]*}}, i8* align 8 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 8)
+void atomic_srcdst_cpy_longlong(long long *dst, const long long *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size, sizeof(long long)); }
+
+// CHECK-LABEL: atomic_srcdst_move_longlong(
+// CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 8 %{{[0-9]*}}, i8* align 8 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 8)
+void atomic_srcdst_move_longlong(long long *dst, const long long *src, size_t size) { __builtin_memmove_overloaded(dst, src, size, sizeof(long long)); }
+
+// CHECK-LABEL: atomic_dst_set_longlong(
+// CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i32 8)
+void atomic_dst_set_longlong(long long *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size, sizeof(long long)); }
+
+// CHECK-LABEL: atomic_static_srcdst_cpy_char(
+// CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 1)
+void atomic_static_srcdst_cpy_char(char dst[static 2], const char src[2], size_t size) { __builtin_memcpy_overloaded(dst, src, size, 1); }
+
+// CHECK-LABEL: atomic_static_srcdst_move_char(
+// CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 1)
+void atomic_static_srcdst_move_char(char dst[static 2], const char src[2], size_t size) { __builtin_memmove_overloaded(dst, src, size, 1); }
+
+// CHECK-LABEL: atomic_static_dst_set_char(
+// CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i32 1)
+void atomic_static_dst_set_char(char dst[static 2], size_t size) { __builtin_memset_overloaded(dst, 0, size, 1); }
+
+extern char dst_atomic[2];
+extern const char src_atomic[2];
+
+// CHECK-LABEL: atomic_array_srcdst_cpy_char(
+// CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i32 1)
+void atomic_array_srcdst_cpy_char(size_t size) { __builtin_memcpy_overloaded(dst_atomic, src_atomic, size, 1); }
+
+// CHECK-LABEL: atomic_array_srcdst_move_char(
+// CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i32 1)
+void atomic_array_srcdst_move_char(size_t size) { __builtin_memmove_overloaded(dst_atomic, src_atomic, size, 1); }
+
+// CHECK-LABEL: atomic_array_dst_set_char(
+// CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8 0, i64 %{{[0-9]*}}, i32 1)
+void atomic_array_dst_set_char(size_t size) { __builtin_memset_overloaded(dst_atomic, 0, size, 1); }
+
+// CHECK-LABEL: atomic_local_srcdst_cpy_char(
+// CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 4, i32 4)
+void atomic_local_srcdst_cpy_char(size_t size) {
+  int dst;
+  const int src;
+  __builtin_memcpy_overloaded(&dst, &src, sizeof(dst), sizeof(dst));
+}
+
+// CHECK-LABEL: atomic_local_srcdst_move_char(
+// CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 4, i32 4)
+void atomic_local_srcdst_move_char(size_t size) {
+  int dst;
+  const int src;
+  __builtin_memmove_overloaded(&dst, &src, sizeof(dst), sizeof(dst));
+}
+
+// CHECK-LABEL: atomic_local_dst_set_char(
+// CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8 0, i64 4, i32 4)
+void atomic_local_dst_set_char(size_t size) {
+  int dst;
+  __builtin_memset_overloaded(&dst, 0, sizeof(dst), sizeof(dst));
+}
+
+// CHECK-LABEL: vla_srcdst_cpy_char(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %{{[0-9a-z]*}}, i8* align 1 %{{[0-9a-z]*}}, i64 %{{[0-9]*}}, i1 true)
+void vla_srcdst_cpy_char(size_t size) {
+  volatile char dst[size];
+  const volatile char src[size];
+  __builtin_memcpy_overloaded(dst, src, size);
+}
+
+// CHECK-LABEL: vla_srcdst_move_char(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %{{[0-9a-z]*}}, i8* align 1 %{{[0-9a-z]*}}, i64 %{{[0-9]*}}, i1 true)
+void vla_srcdst_move_char(size_t size) {
+  volatile char dst[size];
+  const volatile char src[size];
+  __builtin_memmove_overloaded(dst, src, size);
+}
+
+// CHECK-LABEL: vla_dst_set_char(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %{{[0-9a-z]*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void vla_dst_set_char(size_t size) {
+  volatile char dst[size];
+  __builtin_memset_overloaded(dst, 0, size);
+}
+
+// CHECK-LABEL: static_srcdst_cpy_char(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void static_srcdst_cpy_char(char dst[static 42], const char src[static 42], size_t size) {
+  __builtin_memcpy_overloaded(dst, src, size);
+}
+
+// CHECK-LABEL: static_srcdst_move_char(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void static_srcdst_move_char(char dst[static 42], const char src[static 42], size_t size) {
+  __builtin_memmove_overloaded(dst, src, size);
+}
+
+// CHECK-LABEL: static_dst_set_char(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i1 false)
+void static_dst_set_char(char dst[static 42], size_t size) {
+  __builtin_memset_overloaded(dst, 0, size);
+}
+
+extern char dst_unsized[];
+extern volatile char dst_vunsized[];
+extern const char src_cunsized[];
+extern const volatile char src_cvunsized[];
+
+// CHECK-LABEL: array_volatile_unsized_dst_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_unsized_dst_cpy(size_t size) { __builtin_memcpy_overloaded(dst_vunsized, src_cunsized, size); }
+
+// CHECK-LABEL: array_volatile_unsized_dst_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_unsized_dst_move(size_t size) { __builtin_memmove_overloaded(dst_vunsized, src_cunsized, size); }
+
+// CHECK-LABEL: array_volatile_unsized_dst_set(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_unsized_dst_set(size_t size) { __builtin_memset_overloaded(dst_vunsized, 0, size); }
+
+// CHECK-LABEL: array_volatile_unsized_src_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_unsized_src_cpy(size_t size) { __builtin_memcpy_overloaded(dst_unsized, src_cvunsized, size); }
+
+// CHECK-LABEL: array_volatile_unsized_src_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_unsized_src_move(size_t size) { __builtin_memmove_overloaded(dst_unsized, src_cvunsized, size); }
+
+// CHECK-LABEL: array_volatile_unsized_dstsrc_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_unsized_dstsrc_cpy(size_t size) { __builtin_memcpy_overloaded(dst_vunsized, src_cvunsized, size); }
+
+// CHECK-LABEL: array_volatile_unsized_dstsrc_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_unsized_dstsrc_move(size_t size) { __builtin_memmove_overloaded(dst_vunsized, src_cvunsized, size); }
+
+extern __attribute__((aligned(128))) char dst_512[512];
+extern __attribute__((aligned(128))) volatile char dst_v512[512];
+extern __attribute__((aligned(128))) const char src_c512[512];
+extern __attribute__((aligned(128))) const volatile char src_cv512[512];
+
+// CHECK-LABEL: array_volatile_dst_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_dst_cpy(size_t size) { __builtin_memcpy_overloaded(dst_v512, src_c512, size); }
+
+// CHECK-LABEL: array_volatile_dst_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_dst_move(size_t size) { __builtin_memmove_overloaded(dst_v512, src_c512, size); }
+
+// CHECK-LABEL: array_volatile_dst_set(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_dst_set(size_t size) { __builtin_memset_overloaded(dst_v512, 0, size); }
+
+// CHECK-LABEL: array_volatile_src_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_src_cpy(size_t size) { __builtin_memcpy_overloaded(dst_512, src_cv512, size); }
+
+// CHECK-LABEL: array_volatile_src_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_src_move(size_t size) { __builtin_memmove_overloaded(dst_512, src_cv512, size); }
+
+// CHECK-LABEL: array_volatile_dstsrc_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_dstsrc_cpy(size_t size) { __builtin_memcpy_overloaded(dst_v512, src_cv512, size); }
+
+// CHECK-LABEL: array_volatile_dstsrc_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_dstsrc_move(size_t size) { __builtin_memmove_overloaded(dst_v512, src_cv512, size); }
+
+extern __attribute__((aligned(128))) volatile char dst_v512_32[512][32];
+extern __attribute__((aligned(128))) const volatile char src_cv512_32[512][32];
+
+// CHECK-LABEL: multiarray_volatile_dstsrc_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void multiarray_volatile_dstsrc_cpy(size_t size) { __builtin_memcpy_overloaded(dst_v512_32, src_cv512_32, size); }
+
+// CHECK-LABEL: multiarray_volatile_dstsrc_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void multiarray_volatile_dstsrc_move(size_t size) { __builtin_memmove_overloaded(dst_v512_32, src_cv512_32, size); }
+
+// CHECK-LABEL: multiarray_volatile_dst_set(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void multiarray_volatile_dst_set(size_t size) { __builtin_memset_overloaded(dst_v512_32, 0, size); }
+
+// CHECK-LABEL: multiarray_idx_volatile_dstsrc_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 getelementptr {{.*}}, i8* align 32 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void multiarray_idx_volatile_dstsrc_cpy(size_t size) { __builtin_memcpy_overloaded(dst_v512_32[1], src_cv512_32[1], size); }
+
+// CHECK-LABEL: multiarray_idx_volatile_dstsrc_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 32 getelementptr {{.*}}, i8* align 32 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void multiarray_idx_volatile_dstsrc_move(size_t size) { __builtin_memmove_overloaded(dst_v512_32[1], src_cv512_32[1], size); }
+
+// CHECK-LABEL: multiarray_idx_volatile_dst_set(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 32 getelementptr {{.*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void multiarray_idx_volatile_dst_set(size_t size) { __builtin_memset_overloaded(dst_v512_32[1], 0, size); }
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -1716,6 +1716,11 @@
     }
     break;
   }
+  case Builtin::BI__builtin_memcpy_overloaded:
+  case Builtin::BI__builtin_memmove_overloaded:
+    return SemaBuiltinMemcpyOverloaded(TheCallResult);
+  case Builtin::BI__builtin_memset_overloaded:
+    return SemaBuiltinMemsetOverloaded(TheCallResult);
 #define BUILTIN(ID, TYPE, ATTRS)
 #define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \
   case Builtin::BI##ID: \
@@ -5428,6 +5433,266 @@
   return TheCallResult;
 }
 
+/// Perform semantic checking for __builtin_memcpy_overloaded and
+/// __builtin_memmove_overloaded, which are overloaded based on the pointer
+/// types of the destination and source arguments.
+ExprResult Sema::SemaBuiltinMemcpyOverloaded(ExprResult TheCallResult) {
+  CallExpr *TheCall = (CallExpr *)TheCallResult.get();
+
+  unsigned argCount = TheCall->getNumArgs();
+  if (argCount != 3 && argCount != 4) {
+    if (argCount < 3)
+      return ExprError(
+          Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_few_args)
+          << 0 /*function call*/ << 3 << argCount << TheCall->getSourceRange());
+    SourceRange excessRange(TheCall->getArg(4)->getBeginLoc(),
+                            TheCall->getArg(argCount - 1)->getEndLoc());
+    return ExprError(
+        Diag(excessRange.getBegin(), diag::err_typecheck_call_too_many_args)
+        << 0 /*function call*/ << 4 << argCount
+        << TheCall->getArg(1)->getSourceRange());
+  }
+
+  bool HasElSz = argCount == 4;
+
+  ExprResult DstPtr = DefaultFunctionArrayLvalueConversion(TheCall->getArg(0));
+  if (DstPtr.isInvalid())
+    return ExprError();
+  clang::Expr *DstOp = DstPtr.get();
+  TheCall->setArg(0, DstOp);
+
+  ExprResult SrcPtr = DefaultFunctionArrayLvalueConversion(TheCall->getArg(1));
+  if (SrcPtr.isInvalid())
+    return ExprError();
+  clang::Expr *SrcOp = SrcPtr.get();
+  TheCall->setArg(1, SrcOp);
+
+  const PointerType *DstTy = DstOp->getType()->getAs<PointerType>();
+  const PointerType *SrcTy = SrcOp->getType()->getAs<PointerType>();
+  if (!DstTy)
+    return ExprError(
+        Diag(TheCall->getBeginLoc(), diag::err_init_conversion_failed)
+        << InitializedEntity::EK_Parameter << Context.VoidPtrTy
+        << DstOp->isLValue() << DstOp->getType() << /*no difference*/ 0
+        << DstOp->getSourceRange());
+  if (!SrcTy)
+    return ExprError(
+        Diag(TheCall->getBeginLoc(), diag::err_init_conversion_failed)
+        << InitializedEntity::EK_Parameter << Context.VoidPtrTy
+        << SrcOp->isLValue() << SrcOp->getType() << /*no difference*/ 0
+        << SrcOp->getSourceRange());
+
+  QualType DstValTy = DstTy->getPointeeType();
+  QualType SrcValTy = SrcTy->getPointeeType();
+
+  if (DstValTy.isConstQualified())
+    return ExprError(Diag(TheCall->getBeginLoc(), PDiag(diag::err_const_arg))
+                     << DstValTy << DstOp->getSourceRange());
+  if (DstValTy->isAtomicType())
+    return ExprError(
+        Diag(TheCall->getBeginLoc(), PDiag(diag::err_atomic_qualifier_invalid))
+        << DstValTy << DstOp->getSourceRange());
+  if (SrcValTy->isAtomicType())
+    return ExprError(
+        Diag(TheCall->getBeginLoc(), PDiag(diag::err_atomic_qualifier_invalid))
+        << SrcValTy << SrcOp->getSourceRange());
+
+  ExprResult SizeRes(TheCall->getArg(2));
+  InitializedEntity SizeEntity = InitializedEntity::InitializeParameter(
+      Context, Context.getSizeType(), false);
+  SizeRes = PerformCopyInitialization(SizeEntity, SourceLocation(), SizeRes);
+  if (SizeRes.isInvalid())
+    return ExprError();
+  TheCall->setArg(2, SizeRes.get());
+
+  bool IsNonZero;
+  if (!SizeRes.get()->isValueDependent() &&
+      SizeRes.get()->EvaluateAsBooleanCondition(IsNonZero, Context) &&
+      IsNonZero) {
+    CheckNonNullArgument(*this, DstOp, TheCall->getExprLoc());
+    CheckNonNullArgument(*this, SrcOp, TheCall->getExprLoc());
+  }
+
+  if (HasElSz) {
+    clang::Expr *Arg = TheCall->getArg(3);
+
+    if (!DstValTy.isTriviallyCopyableType(Context))
+      return ExprError(Diag(TheCall->getBeginLoc(),
+                            PDiag(diag::err_atomic_op_needs_trivial_copy))
+                       << DstValTy << DstOp->getSourceRange());
+    if (!SrcValTy.isTriviallyCopyableType(Context))
+      return ExprError(Diag(TheCall->getBeginLoc(),
+                            PDiag(diag::err_atomic_op_needs_trivial_copy))
+                       << SrcValTy << SrcOp->getSourceRange());
+    if (DstValTy.isVolatileQualified())
+      return ExprError(Diag(TheCall->getBeginLoc(),
+                            PDiag(diag::err_sized_volatile_unsupported))
+                       << DstValTy << DstOp->getSourceRange());
+    if (SrcValTy.isVolatileQualified())
+      return ExprError(Diag(TheCall->getBeginLoc(),
+                            PDiag(diag::err_sized_volatile_unsupported))
+                       << SrcValTy << SrcOp->getSourceRange());
+
+    if (!Arg->isValueDependent()) {
+      llvm::APSInt ElSz;
+      ExprResult ElSzRes(VerifyIntegerConstantExpression(Arg, &ElSz));
+      if (ElSzRes.isInvalid())
+        return ExprError();
+      TheCall->setArg(3, ElSzRes.get());
+
+      if (!ElSz.isStrictlyPositive() || !ElSz.isPowerOf2())
+        return ExprError(
+            Diag(TheCall->getBeginLoc(), diag::err_argument_not_power_of_2)
+            << Arg->getSourceRange());
+      int InlineWidth =
+          Context
+              .toCharUnitsFromBits(
+                  Context.getTargetInfo().getMaxAtomicInlineWidth())
+              .getQuantity();
+      if (ElSz.ugt(InlineWidth))
+        return ExprError(Diag(TheCall->getBeginLoc(),
+                              PDiag(diag::err_elsz_must_be_lock_free))
+                         << (int)ElSz.getLimitedValue() << InlineWidth
+                         << Arg->getSourceRange());
+
+      if (int DstElSz = Context.getTypeSizeInChars(DstValTy).getQuantity()) {
+        if (ElSz.urem(DstElSz) != 0)
+          return ExprError(
+              Diag(TheCall->getBeginLoc(),
+                   PDiag(diag::err_atomic_builtin_ext_size_mismatches_el))
+              << (int)ElSz.getLimitedValue() << DstElSz << DstValTy
+              << DstOp->getSourceRange() << Arg->getSourceRange());
+      }
+
+      if (int SrcElSz = Context.getTypeSizeInChars(SrcValTy).getQuantity()) {
+        if (ElSz.urem(SrcElSz) != 0)
+          return ExprError(
+              Diag(TheCall->getBeginLoc(),
+                   PDiag(diag::err_atomic_builtin_ext_size_mismatches_el))
+              << (int)ElSz.getLimitedValue() << SrcElSz << SrcValTy
+              << SrcOp->getSourceRange() << Arg->getSourceRange());
+      }
+    }
+  }
+
+  return TheCallResult;
+}
+
+/// Perform semantic checking for __builtin_memset_overloaded and
+/// __builtin_memset_overloaded, which is overloaded based on the pointer type
+/// of the destination argument.
+ExprResult Sema::SemaBuiltinMemsetOverloaded(ExprResult TheCallResult) {
+  CallExpr *TheCall = (CallExpr *)TheCallResult.get();
+
+  unsigned argCount = TheCall->getNumArgs();
+  if (argCount != 3 && argCount != 4) {
+    if (argCount < 3)
+      return ExprError(
+          Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_few_args)
+          << 0 /*function call*/ << 3 << argCount << TheCall->getSourceRange());
+    SourceRange excessRange(TheCall->getArg(4)->getBeginLoc(),
+                            TheCall->getArg(argCount - 1)->getEndLoc());
+    return ExprError(
+        Diag(excessRange.getBegin(), diag::err_typecheck_call_too_many_args)
+        << 0 /*function call*/ << 4 << argCount
+        << TheCall->getArg(1)->getSourceRange());
+  }
+
+  bool HasElSz = argCount == 4;
+
+  ExprResult DstPtr = DefaultFunctionArrayLvalueConversion(TheCall->getArg(0));
+  if (DstPtr.isInvalid())
+    return ExprError();
+  clang::Expr *DstOp = DstPtr.get();
+  TheCall->setArg(0, DstOp);
+
+  const PointerType *DstTy = DstOp->getType()->getAs<PointerType>();
+  if (!DstTy)
+    return ExprError(
+        Diag(TheCall->getBeginLoc(), diag::err_init_conversion_failed)
+        << InitializedEntity::EK_Parameter << Context.VoidPtrTy
+        << DstOp->isLValue() << DstOp->getType() << /*no difference*/ 0
+        << DstOp->getSourceRange());
+
+  QualType DstValTy = DstTy->getPointeeType();
+  if (DstValTy.isConstQualified())
+    return ExprError(Diag(TheCall->getBeginLoc(), PDiag(diag::err_const_arg))
+                     << DstValTy << DstOp->getSourceRange());
+  if (DstValTy->isAtomicType())
+    return ExprError(
+        Diag(TheCall->getBeginLoc(), PDiag(diag::err_atomic_qualifier_invalid))
+        << DstValTy << DstOp->getSourceRange());
+
+  ExprResult ValRes(TheCall->getArg(1));
+  InitializedEntity ValEntity = InitializedEntity::InitializeParameter(
+      Context, Context.UnsignedCharTy, false);
+  ValRes = PerformCopyInitialization(ValEntity, SourceLocation(), ValRes);
+  if (ValRes.isInvalid())
+    return ExprError();
+  TheCall->setArg(1, ValRes.get());
+
+  ExprResult SizeRes(TheCall->getArg(2));
+  InitializedEntity SizeEntity = InitializedEntity::InitializeParameter(
+      Context, Context.getSizeType(), false);
+  SizeRes = PerformCopyInitialization(SizeEntity, SourceLocation(), SizeRes);
+  if (SizeRes.isInvalid())
+    return ExprError();
+  TheCall->setArg(2, SizeRes.get());
+
+  bool IsNonZero;
+  if (!SizeRes.get()->isValueDependent() &&
+      SizeRes.get()->EvaluateAsBooleanCondition(IsNonZero, Context) &&
+      IsNonZero)
+    CheckNonNullArgument(*this, DstOp, TheCall->getExprLoc());
+
+  if (HasElSz) {
+    clang::Expr *Arg = TheCall->getArg(3);
+
+    if (!DstValTy.isTriviallyCopyableType(Context))
+      return ExprError(Diag(TheCall->getBeginLoc(),
+                            PDiag(diag::err_atomic_op_needs_trivial_copy))
+                       << DstValTy << DstOp->getSourceRange());
+    if (DstValTy.isVolatileQualified())
+      return ExprError(Diag(TheCall->getBeginLoc(),
+                            PDiag(diag::err_sized_volatile_unsupported))
+                       << DstValTy << DstOp->getSourceRange());
+
+    if (!Arg->isValueDependent()) {
+      llvm::APSInt ElSz;
+      ExprResult ElSzRes(VerifyIntegerConstantExpression(Arg, &ElSz));
+      if (ElSzRes.isInvalid())
+        return ExprError();
+      TheCall->setArg(3, ElSzRes.get());
+
+      if (!ElSz.isStrictlyPositive() || !ElSz.isPowerOf2())
+        return ExprError(
+            Diag(TheCall->getBeginLoc(), diag::err_argument_not_power_of_2)
+            << Arg->getSourceRange());
+      int InlineWidth =
+          Context
+              .toCharUnitsFromBits(
+                  Context.getTargetInfo().getMaxAtomicInlineWidth())
+              .getQuantity();
+      if (ElSz.ugt(InlineWidth))
+        return ExprError(Diag(TheCall->getBeginLoc(),
+                              PDiag(diag::err_elsz_must_be_lock_free))
+                         << (int)ElSz.getLimitedValue() << InlineWidth
+                         << Arg->getSourceRange());
+
+      if (int DstElSz = Context.getTypeSizeInChars(DstValTy).getQuantity()) {
+        if (ElSz.urem(DstElSz) != 0)
+          return ExprError(
+              Diag(TheCall->getBeginLoc(),
+                   PDiag(diag::err_atomic_builtin_ext_size_mismatches_el))
+              << (int)ElSz.getLimitedValue() << DstElSz << DstValTy
+              << DstOp->getSourceRange() << Arg->getSourceRange());
+      }
+    }
+  }
+
+  return TheCallResult;
+}
+
 /// CheckObjCString - Checks that the argument to the builtin
 /// CFString constructor is correct
 /// Note: It might also make sense to do the UTF-16 conversion here (would
Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -1061,8 +1061,8 @@
 //                         LValue Expression Emission
 //===----------------------------------------------------------------------===//
 
-/// EmitPointerWithAlignment - Given an expression of pointer type, try to
-/// derive a more accurate bound on the alignment of the pointer.
+/// Given an expression of pointer type, try to derive a more accurate bound on
+/// the alignment of the pointer.
 Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
                                                   LValueBaseInfo *BaseInfo,
                                                   TBAAAccessInfo *TBAAInfo) {
@@ -1164,6 +1164,9 @@
 
   // TODO: conditional operators, comma.
 
+  if (E->getType()->isArrayType())
+    return EmitArrayToPointerDecay(E, BaseInfo, TBAAInfo);
+
   // Otherwise, use the alignment of the type.
   CharUnits Align =
       CGM.getNaturalPointeeTypeAlignment(E->getType(), BaseInfo, TBAAInfo);
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -625,6 +625,16 @@
   return {Width, Signed};
 }
 
+static QualType getPtrArgType(CodeGenModule &CGM, const CallExpr *E,
+                              unsigned ArgNo) {
+  QualType ArgTy = E->getArg(ArgNo)->IgnoreImpCasts()->getType();
+  if (ArgTy->isArrayType())
+    return CGM.getContext().getAsArrayType(ArgTy)->getElementType();
+  if (ArgTy->isObjCObjectPointerType())
+    return ArgTy->castAs<clang::ObjCObjectPointerType>()->getPointeeType();
+  return ArgTy->castAs<clang::PointerType>()->getPointeeType();
+}
+
 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
   llvm::Type *DestType = Int8PtrTy;
   if (ArgValue->getType() != DestType)
@@ -2623,16 +2633,30 @@
   }
   case Builtin::BImemcpy:
   case Builtin::BI__builtin_memcpy:
+  case Builtin::BI__builtin_memcpy_overloaded:
   case Builtin::BImempcpy:
   case Builtin::BI__builtin_mempcpy: {
+    QualType DestTy = getPtrArgType(CGM, E, 0);
+    QualType SrcTy = getPtrArgType(CGM, E, 1);
     Address Dest = EmitPointerWithAlignment(E->getArg(0));
     Address Src = EmitPointerWithAlignment(E->getArg(1));
+    bool isVolatile =
+        DestTy.isVolatileQualified() || SrcTy.isVolatileQualified();
+    bool isAtomic = E->getNumArgs() == 4;
     Value *SizeVal = EmitScalarExpr(E->getArg(2));
     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
                         E->getArg(0)->getExprLoc(), FD, 0);
     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
                         E->getArg(1)->getExprLoc(), FD, 1);
-    Builder.CreateMemCpy(Dest, Src, SizeVal, false);
+    if (isAtomic) {
+      // FIXME add UBSan builtin check for alignment and Size / ElSz.
+      auto ElSz =
+          CharUnits::fromQuantity(E->getArg(3)
+                                      ->getIntegerConstantExpr(CGM.getContext())
+                                      ->getLimitedValue());
+      Builder.CreateElementUnorderedAtomicMemCpy(Dest, Src, SizeVal, ElSz);
+    } else
+      Builder.CreateMemCpy(Dest, Src, SizeVal, isVolatile);
     if (BuiltinID == Builtin::BImempcpy ||
         BuiltinID == Builtin::BI__builtin_mempcpy)
       return RValue::get(Builder.CreateInBoundsGEP(Dest.getPointer(), SizeVal));
@@ -2701,26 +2725,52 @@
   }
 
   case Builtin::BImemmove:
-  case Builtin::BI__builtin_memmove: {
+  case Builtin::BI__builtin_memmove:
+  case Builtin::BI__builtin_memmove_overloaded: {
+    QualType DestTy = getPtrArgType(CGM, E, 0);
+    QualType SrcTy = getPtrArgType(CGM, E, 1);
     Address Dest = EmitPointerWithAlignment(E->getArg(0));
     Address Src = EmitPointerWithAlignment(E->getArg(1));
+    bool isVolatile =
+        DestTy.isVolatileQualified() || SrcTy.isVolatileQualified();
+    bool isAtomic = E->getNumArgs() == 4;
     Value *SizeVal = EmitScalarExpr(E->getArg(2));
     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
                         E->getArg(0)->getExprLoc(), FD, 0);
     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
                         E->getArg(1)->getExprLoc(), FD, 1);
-    Builder.CreateMemMove(Dest, Src, SizeVal, false);
+    if (isAtomic) {
+      // FIXME add UBSan builtin check for alignment and Size / ElSz.
+      auto ElSz =
+          CharUnits::fromQuantity(E->getArg(3)
+                                      ->getIntegerConstantExpr(CGM.getContext())
+                                      ->getLimitedValue());
+      Builder.CreateElementUnorderedAtomicMemMove(Dest, Src, SizeVal, ElSz);
+    } else
+      Builder.CreateMemMove(Dest, Src, SizeVal, isVolatile);
     return RValue::get(Dest.getPointer());
   }
   case Builtin::BImemset:
-  case Builtin::BI__builtin_memset: {
+  case Builtin::BI__builtin_memset:
+  case Builtin::BI__builtin_memset_overloaded: {
+    QualType DestTy = getPtrArgType(CGM, E, 0);
     Address Dest = EmitPointerWithAlignment(E->getArg(0));
     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
                                          Builder.getInt8Ty());
+    bool isVolatile = DestTy.isVolatileQualified();
+    bool isAtomic = E->getNumArgs() == 4;
     Value *SizeVal = EmitScalarExpr(E->getArg(2));
     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
                         E->getArg(0)->getExprLoc(), FD, 0);
-    Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
+    if (isAtomic) {
+      // FIXME add UBSan builtin check for alignment and Size / ElSz.
+      auto ElSz =
+          CharUnits::fromQuantity(E->getArg(3)
+                                      ->getIntegerConstantExpr(CGM.getContext())
+                                      ->getLimitedValue());
+      Builder.CreateElementUnorderedAtomicMemSet(Dest, ByteVal, SizeVal, ElSz);
+    } else
+      Builder.CreateMemSet(Dest, ByteVal, SizeVal, isVolatile);
     return RValue::get(Dest.getPointer());
   }
   case Builtin::BI__builtin___memset_chk: {
Index: clang/lib/CodeGen/CGBuilder.h
===================================================================
--- clang/lib/CodeGen/CGBuilder.h
+++ clang/lib/CodeGen/CGBuilder.h
@@ -279,6 +279,15 @@
                         IsVolatile);
   }
 
+  using CGBuilderBaseTy::CreateElementUnorderedAtomicMemCpy;
+  llvm::CallInst *CreateElementUnorderedAtomicMemCpy(Address Dest, Address Src,
+                                                     llvm::Value *Size,
+                                                     CharUnits ElementSize) {
+    return CreateElementUnorderedAtomicMemCpy(
+        Dest.getPointer(), Dest.getAlignment().getAsAlign(), Src.getPointer(),
+        Src.getAlignment().getAsAlign(), Size, ElementSize.getQuantity());
+  }
+
   using CGBuilderBaseTy::CreateMemCpyInline;
   llvm::CallInst *CreateMemCpyInline(Address Dest, Address Src, uint64_t Size) {
     return CreateMemCpyInline(
@@ -294,6 +303,15 @@
                          Size, IsVolatile);
   }
 
+  using CGBuilderBaseTy::CreateElementUnorderedAtomicMemMove;
+  llvm::CallInst *CreateElementUnorderedAtomicMemMove(Address Dest, Address Src,
+                                                      llvm::Value *Size,
+                                                      CharUnits ElementSize) {
+    return CreateElementUnorderedAtomicMemMove(
+        Dest.getPointer(), Dest.getAlignment().getAsAlign(), Src.getPointer(),
+        Src.getAlignment().getAsAlign(), Size, ElementSize.getQuantity());
+  }
+
   using CGBuilderBaseTy::CreateMemSet;
   llvm::CallInst *CreateMemSet(Address Dest, llvm::Value *Value,
                                llvm::Value *Size, bool IsVolatile = false) {
@@ -301,6 +319,16 @@
                         Dest.getAlignment().getAsAlign(), IsVolatile);
   }
 
+  using CGBuilderBaseTy::CreateElementUnorderedAtomicMemSet;
+  llvm::CallInst *CreateElementUnorderedAtomicMemSet(Address Dest,
+                                                     llvm::Value *Value,
+                                                     llvm::Value *Size,
+                                                     CharUnits ElementSize) {
+    return CreateElementUnorderedAtomicMemSet(Dest.getPointer(), Value, Size,
+                                              Dest.getAlignment().getAsAlign(),
+                                              ElementSize.getQuantity());
+  }
+
   using CGBuilderBaseTy::CreatePreserveStructAccessIndex;
   Address CreatePreserveStructAccessIndex(Address Addr,
                                           unsigned Index,
Index: clang/lib/AST/ExprConstant.cpp
===================================================================
--- clang/lib/AST/ExprConstant.cpp
+++ clang/lib/AST/ExprConstant.cpp
@@ -8778,6 +8778,8 @@
     LLVM_FALLTHROUGH;
   case Builtin::BI__builtin_memcpy:
   case Builtin::BI__builtin_memmove:
+  case Builtin::BI__builtin_memcpy_overloaded:
+  case Builtin::BI__builtin_memmove_overloaded:
   case Builtin::BI__builtin_wmemcpy:
   case Builtin::BI__builtin_wmemmove: {
     bool WChar = BuiltinOp == Builtin::BIwmemcpy ||
@@ -8787,6 +8789,7 @@
     bool Move = BuiltinOp == Builtin::BImemmove ||
                 BuiltinOp == Builtin::BIwmemmove ||
                 BuiltinOp == Builtin::BI__builtin_memmove ||
+                BuiltinOp == Builtin::BI__builtin_memmove_overloaded ||
                 BuiltinOp == Builtin::BI__builtin_wmemmove;
 
     // The result of mem* is the first argument.
@@ -8841,6 +8844,21 @@
       return false;
     }
 
+    if (E->getNumArgs() == 4) {
+      // Overloaded mem* functions have an optional 4th parameter which denotes
+      // atomic element size in bytes. Constexpr interpretation doesn't care
+      // about atomicity, but needs to check runtime constraints on size. We
+      // can't check the alignment runtime constraints.
+      APSInt ElSz;
+      if (!EvaluateInteger(E->getArg(3), ElSz, Info))
+        return false;
+      if (N.urem(ElSz.getLimitedValue()) != 0) {
+        Info.FFDiag(E, diag::note_constexpr_mem_overloaded_bad_size)
+            << (int)N.getLimitedValue() << (int)ElSz.getLimitedValue();
+        return false;
+      }
+    }
+
     // Figure out how many T's we're copying.
     uint64_t TSize = Info.Ctx.getTypeSizeInChars(T).getQuantity();
     if (!WChar) {
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -12204,6 +12204,8 @@
   bool SemaBuiltinSetjmp(CallExpr *TheCall);
   ExprResult SemaBuiltinAtomicOverloaded(ExprResult TheCallResult);
   ExprResult SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult);
+  ExprResult SemaBuiltinMemcpyOverloaded(ExprResult TheCallResult);
+  ExprResult SemaBuiltinMemsetOverloaded(ExprResult TheCallResult);
   ExprResult SemaAtomicOpsOverloaded(ExprResult TheCallResult,
                                      AtomicExpr::AtomicOp Op);
   ExprResult SemaBuiltinOperatorNewDeleteOverloaded(ExprResult TheCallResult,
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7932,6 +7932,8 @@
 
 def warn_call_wrong_number_of_arguments : Warning<
   "too %select{few|many}0 arguments in call to %1">;
+def err_atomic_qualifier_invalid : Error<
+  "parameter cannot have the _Atomic qualifier (%0 invalid)">;
 def err_atomic_builtin_must_be_pointer : Error<
   "address argument to atomic builtin must be a pointer (%0 invalid)">;
 def err_atomic_builtin_must_be_pointer_intptr : Error<
@@ -8931,6 +8933,18 @@
   "null returned from %select{function|method}0 that requires a non-null return value">,
   InGroup<NonNull>;
 
+def err_const_arg : Error<
+  "argument must be non-const, got %0">;
+
+def err_sized_volatile_unsupported : Error<
+  "specifying an access size for volatile memory operations is unsupported "
+  "(%0 is volatile)">;
+def err_elsz_must_be_lock_free : Error<
+  "element size must be a lock-free size, %0 exceeds %1 bytes">;
+def err_atomic_builtin_ext_size_mismatches_el : Error<
+  "number of bytes to copy must be a multiple of pointer element size, "
+  "got %0 bytes to copy with element size %1 for %2">;
+
 def err_lifetimebound_no_object_param : Error<
   "'lifetimebound' attribute cannot be applied; %select{static |non-}0member "
   "function has no implicit object parameter">;
Index: clang/include/clang/Basic/DiagnosticASTKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticASTKinds.td
+++ clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -282,6 +282,9 @@
   "size to copy (%4) is not a multiple of size of element type %3 (%5)|"
   "source is not a contiguous array of at least %4 elements of type %3|"
   "destination is not a contiguous array of at least %4 elements of type %3}2">;
+def note_constexpr_mem_overloaded_bad_size : Note<
+  "size parameter is %0, expected a size that is evenly divisible by "
+  "element size %1">;
 def note_constexpr_bit_cast_unsupported_type : Note<
   "constexpr bit_cast involving type %0 is not yet supported">;
 def note_constexpr_bit_cast_unsupported_bitfield : Note<
Index: clang/include/clang/Basic/Builtins.def
===================================================================
--- clang/include/clang/Basic/Builtins.def
+++ clang/include/clang/Basic/Builtins.def
@@ -488,7 +488,6 @@
 BUILTIN(__builtin_memchr, "v*vC*iz", "nF")
 BUILTIN(__builtin_memcmp, "ivC*vC*z", "nF")
 BUILTIN(__builtin_memcpy, "v*v*vC*z", "nF")
-BUILTIN(__builtin_memcpy_inline, "vv*vC*Iz", "nt")
 BUILTIN(__builtin_memmove, "v*v*vC*z", "nF")
 BUILTIN(__builtin_mempcpy, "v*v*vC*z", "nF")
 BUILTIN(__builtin_memset, "v*v*iz", "nF")
@@ -1491,6 +1490,10 @@
 BUILTIN(__builtin_char_memchr, "c*cC*iz", "n")
 BUILTIN(__builtin_dump_struct, "ivC*v*", "tn")
 BUILTIN(__builtin_preserve_access_index, "v.", "t")
+BUILTIN(__builtin_memcpy_inline, "vv*vC*Iz", "nt")
+BUILTIN(__builtin_memcpy_overloaded, "v*v*vC*z", "nt")
+BUILTIN(__builtin_memmove_overloaded, "v*v*vC*z", "nt")
+BUILTIN(__builtin_memset_overloaded, "v*v*iz", "nt")
 
 // Alignment builtins (uses custom parsing to support pointers and integers)
 BUILTIN(__builtin_is_aligned, "bvC*z", "nct")
Index: clang/docs/LanguageExtensions.rst
===================================================================
--- clang/docs/LanguageExtensions.rst
+++ clang/docs/LanguageExtensions.rst
@@ -2420,6 +2420,47 @@
 
 Note that the `size` argument must be a compile time constant.
 
+Clang provides versions of the following functions which are overloaded based on
+the pointer parameter types:
+
+* ``__builtin_memcpy_overloaded(QUAL0 T *dst, QUAL1 const U *src, size_t byte_size, size_t byte_element_size = <unspecified>)``
+* ``__builtin_memmove_overloaded(QUAL0 T *dst, QUAL1 const U *src, size_t byte_size, size_t byte_element_size = <unspecified>)``
+* ``__builtin_memset_overloaded(QUAL T *dst, unsigned char val, size_t byte_size, size_t byte_element_size = <unspecified>)``
+
+These overloads support destinations and sources which are a mix of the
+following qualifiers:
+
+* ``volatile``
+* ``restrict``
+* ``__unaligned``
+* non-default address spaces
+
+The ``_Atomic`` qualifier is not supported. Rather, an optional last function
+parameter can be provided to specify element access size in bytes. Element size
+must be a compile-time constant. When the element size is provided, the memory
+will be accessed with a sequence of operations of size equal to or a multiple of
+the requested element size. The order of operations is unspecified, and each
+access has unordered atomic semantics. This means that reads and writes do not
+tear at the individual element level, and they each occur exactly once, but the
+order in which they occur (and in which they are observable) can only be
+guaranteed using appropriate fences around the function call. Element size must
+therefore be a lock-free size for the target architecture. It is a runtime
+constraint violation to provide a memory locations which is aligned to less than
+the element size. It is a runtime constraint violation to provide a size which
+is not evenly divided by the specified element size.
+
+When the element size parameter is not provided, the access size is unspecified
+and might be non-uniform throughout the operation.
+
+The builtins can be used as building blocks for different facilities:
+
+* Using ``volatile`` to copy data a single time from untrusted buffers, avoiding
+  Time-of-Check Time-of-Use security issues.
+* Using ``volatile`` to implement memory operations which will not be eliminated
+  by the optimizer, such as C's Annex K ``memset_s``.
+* Implement an atomic memory with atomic operations of a particular size,
+  similar to that presented in C++ proposal [p1478](https://wg21.link/p1478).
+
 Clang provides constant expression evaluation support for builtin forms of the
 following functions from the C standard library headers
 ``<string.h>`` and ``<wchar.h>``:
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to