[PATCH] D124536: [AMDGPU] Add gfx11 subtarget ELF definition

2022-04-27 Thread Joe Nash via Phabricator via cfe-commits
Joe_Nash created this revision.
Herald added subscribers: hsmhsm, foad, dexonsmith, kerbowa, rupprecht, 
hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, emaste, 
arsenm.
Herald added a reviewer: jhenderson.
Herald added a project: All.
Joe_Nash requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, MaskRay, wdng.
Herald added projects: clang, LLVM.

This is the first patch of a series to upstream support for the new
subtarget.

Contributors:
Jay Foad 
Konstantin Zhuravlyov 

Patch 1/N for upstreaming AMDGPU gfx11 architectures.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D124536

Files:
  clang/test/Misc/target-invalid-cpu-note.c
  llvm/include/llvm/BinaryFormat/ELF.h
  llvm/include/llvm/Support/TargetParser.h
  llvm/lib/Object/ELFObjectFile.cpp
  llvm/lib/ObjectYAML/ELFYAML.cpp
  llvm/lib/Support/TargetParser.cpp
  llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
  llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
  llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
  llvm/tools/llvm-readobj/ELFDumper.cpp

Index: llvm/tools/llvm-readobj/ELFDumper.cpp
===
--- llvm/tools/llvm-readobj/ELFDumper.cpp
+++ llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1539,6 +1539,10 @@
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1034),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1035),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1036),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1100),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1101),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1102),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1103),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_V3),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_V3)
 };
@@ -1595,6 +1599,10 @@
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1034),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1035),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1036),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1100),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1101),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1102),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1103),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ANY_V4),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_OFF_V4),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ON_V4),
Index: llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
===
--- llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
+++ llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
@@ -337,6 +337,42 @@
 # RUN: yaml2obj %s -o %t -DABI_VERSION=16 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,UNKNOWN-ABI-VERSION --match-full-lines -DABI_VERSION=16 -DFILE=%t -DFLAG_VALUE=0x3F
 
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101 -DFLAG_VALUE=0x46
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101 -DFLAG_VALUE=0x46
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101 -DFLAG_VALUE=0x46
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1102
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_M

[PATCH] D124537: [AMDGPU][clang] Definition of gfx11 subtarget

2022-04-27 Thread Joe Nash via Phabricator via cfe-commits
Joe_Nash created this revision.
Herald added subscribers: mattd, gchakrabarti, asavonic, dexonsmith, kerbowa, 
t-tye, tpr, dstuttard, yaxunl, jvesely, kzhuravl, jholewinski.
Herald added a project: All.
Joe_Nash requested review of this revision.
Herald added subscribers: cfe-commits, wdng.
Herald added a project: clang.

Contributors:
Jay Foad 
Konstantin Zhuravlyov 

Patch 2/N for upstreaming of AMDGPU gfx11 architecture

Depends on D124536 


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D124537

Files:
  clang/include/clang/Basic/Cuda.h
  clang/lib/Basic/Cuda.cpp
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/test/CodeGenOpenCL/amdgpu-features.cl
  clang/test/Driver/amdgpu-macros.cl
  clang/test/Driver/amdgpu-mcpu.cl
  clang/test/Misc/target-invalid-cpu-note.c

Index: clang/test/Misc/target-invalid-cpu-note.c
===
--- clang/test/Misc/target-invalid-cpu-note.c
+++ clang/test/Misc/target-invalid-cpu-note.c
@@ -29,7 +29,7 @@
 
 // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
 // NVPTX: error: unknown target CPU 'not-a-cpu'
-// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036{{$}}
+// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}}
 
 // RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
 // R600: error: unknown target CPU 'not-a-cpu'
Index: clang/test/Driver/amdgpu-mcpu.cl
===
--- clang/test/Driver/amdgpu-mcpu.cl
+++ clang/test/Driver/amdgpu-mcpu.cl
@@ -104,6 +104,10 @@
 // RUN: %clang -### -target amdgcn -mcpu=gfx1034 %s 2>&1 | FileCheck --check-prefix=GFX1034 %s
 // RUN: %clang -### -target amdgcn -mcpu=gfx1035 %s 2>&1 | FileCheck --check-prefix=GFX1035 %s
 // RUN: %clang -### -target amdgcn -mcpu=gfx1036 %s 2>&1 | FileCheck --check-prefix=GFX1036 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck --check-prefix=GFX1100 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx1101 %s 2>&1 | FileCheck --check-prefix=GFX1101 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx1102 %s 2>&1 | FileCheck --check-prefix=GFX1102 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx1103 %s 2>&1 | FileCheck --check-prefix=GFX1103 %s
 
 // GCNDEFAULT-NOT: -target-cpu
 // GFX600:"-target-cpu" "gfx600"
@@ -140,3 +144,7 @@
 // GFX1034:   "-target-cpu" "gfx1034"
 // GFX1035:   "-target-cpu" "gfx1035"
 // GFX1036:   "-target-cpu" "gfx1036"
+// GFX1100:   "-target-cpu" "gfx1100"
+// GFX1101:   "-target-cpu" "gfx1101"
+// GFX1102:   "-target-cpu" "gfx1102"
+// GFX1103:   "-target-cpu" "gfx1103"
Index: clang/test/Driver/amdgpu-macros.cl
===
--- clang/test/Driver/amdgpu-macros.cl
+++ clang/test/Driver/amdgpu-macros.cl
@@ -120,6 +120,10 @@
 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1034 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1034
 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1035 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1035
 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1036 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1036
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1100
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1101 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1101
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1102 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1102
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1103 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1103
 
 // ARCH-GCN-DAG: #define FP_FAST_FMA 1
 
Index: clang/test/CodeGenOpenCL/amdgpu-features.cl
=

[PATCH] D124536: [AMDGPU] Add gfx11 subtarget ELF definition

2022-04-28 Thread Joe Nash via Phabricator via cfe-commits
Joe_Nash updated this revision to Diff 425802.
Joe_Nash added a comment.

add requested docs


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124536/new/

https://reviews.llvm.org/D124536

Files:
  clang/test/Misc/target-invalid-cpu-note.c
  llvm/docs/AMDGPUUsage.rst
  llvm/include/llvm/BinaryFormat/ELF.h
  llvm/include/llvm/Support/TargetParser.h
  llvm/lib/Object/ELFObjectFile.cpp
  llvm/lib/ObjectYAML/ELFYAML.cpp
  llvm/lib/Support/TargetParser.cpp
  llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
  llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
  llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
  llvm/tools/llvm-readobj/ELFDumper.cpp

Index: llvm/tools/llvm-readobj/ELFDumper.cpp
===
--- llvm/tools/llvm-readobj/ELFDumper.cpp
+++ llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1539,6 +1539,10 @@
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1034),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1035),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1036),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1100),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1101),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1102),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1103),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_V3),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_V3)
 };
@@ -1595,6 +1599,10 @@
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1034),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1035),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1036),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1100),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1101),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1102),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1103),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ANY_V4),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_OFF_V4),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ON_V4),
Index: llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
===
--- llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
+++ llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
@@ -337,6 +337,42 @@
 # RUN: yaml2obj %s -o %t -DABI_VERSION=16 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,UNKNOWN-ABI-VERSION --match-full-lines -DABI_VERSION=16 -DFILE=%t -DFLAG_VALUE=0x3F
 
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101 -DFLAG_VALUE=0x46
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101 -DFLAG_VALUE=0x46
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101 -DFLAG_VALUE=0x46
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1102
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1102 -DFLAG_VALUE=0x47
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1102
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1102 -DFLAG_VALUE=0x47
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX

[PATCH] D124536: [AMDGPU] Add gfx11 subtarget ELF definition

2022-04-29 Thread Joe Nash via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG813e521e55b1: [AMDGPU] Add gfx11 subtarget ELF definition 
(authored by Joe_Nash).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124536/new/

https://reviews.llvm.org/D124536

Files:
  clang/test/Misc/target-invalid-cpu-note.c
  llvm/docs/AMDGPUUsage.rst
  llvm/include/llvm/BinaryFormat/ELF.h
  llvm/include/llvm/Support/TargetParser.h
  llvm/lib/Object/ELFObjectFile.cpp
  llvm/lib/ObjectYAML/ELFYAML.cpp
  llvm/lib/Support/TargetParser.cpp
  llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
  llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
  llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
  llvm/tools/llvm-readobj/ELFDumper.cpp

Index: llvm/tools/llvm-readobj/ELFDumper.cpp
===
--- llvm/tools/llvm-readobj/ELFDumper.cpp
+++ llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1539,6 +1539,10 @@
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1034),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1035),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1036),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1100),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1101),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1102),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1103),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_V3),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_V3)
 };
@@ -1595,6 +1599,10 @@
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1034),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1035),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1036),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1100),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1101),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1102),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1103),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ANY_V4),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_OFF_V4),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ON_V4),
Index: llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
===
--- llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
+++ llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
@@ -337,6 +337,42 @@
 # RUN: yaml2obj %s -o %t -DABI_VERSION=16 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,UNKNOWN-ABI-VERSION --match-full-lines -DABI_VERSION=16 -DFILE=%t -DFLAG_VALUE=0x3F
 
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101 -DFLAG_VALUE=0x46
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101 -DFLAG_VALUE=0x46
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1101 -DFLAG_VALUE=0x46
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1102
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1102 -DFLAG_VALUE=0x47
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1102
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DF

[PATCH] D124537: [AMDGPU][clang] Definition of gfx11 subtarget

2022-04-29 Thread Joe Nash via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG8bdfc73f633d: [AMDGPU][clang] Definition of gfx11 subtarget 
(authored by Joe_Nash).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124537/new/

https://reviews.llvm.org/D124537

Files:
  clang/include/clang/Basic/Cuda.h
  clang/lib/Basic/Cuda.cpp
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/test/CodeGenOpenCL/amdgpu-features.cl
  clang/test/Driver/amdgpu-macros.cl
  clang/test/Driver/amdgpu-mcpu.cl
  clang/test/Misc/target-invalid-cpu-note.c

Index: clang/test/Misc/target-invalid-cpu-note.c
===
--- clang/test/Misc/target-invalid-cpu-note.c
+++ clang/test/Misc/target-invalid-cpu-note.c
@@ -29,7 +29,7 @@
 
 // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
 // NVPTX: error: unknown target CPU 'not-a-cpu'
-// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036{{$}}
+// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}}
 
 // RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
 // R600: error: unknown target CPU 'not-a-cpu'
Index: clang/test/Driver/amdgpu-mcpu.cl
===
--- clang/test/Driver/amdgpu-mcpu.cl
+++ clang/test/Driver/amdgpu-mcpu.cl
@@ -104,6 +104,10 @@
 // RUN: %clang -### -target amdgcn -mcpu=gfx1034 %s 2>&1 | FileCheck --check-prefix=GFX1034 %s
 // RUN: %clang -### -target amdgcn -mcpu=gfx1035 %s 2>&1 | FileCheck --check-prefix=GFX1035 %s
 // RUN: %clang -### -target amdgcn -mcpu=gfx1036 %s 2>&1 | FileCheck --check-prefix=GFX1036 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck --check-prefix=GFX1100 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx1101 %s 2>&1 | FileCheck --check-prefix=GFX1101 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx1102 %s 2>&1 | FileCheck --check-prefix=GFX1102 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx1103 %s 2>&1 | FileCheck --check-prefix=GFX1103 %s
 
 // GCNDEFAULT-NOT: -target-cpu
 // GFX600:"-target-cpu" "gfx600"
@@ -140,3 +144,7 @@
 // GFX1034:   "-target-cpu" "gfx1034"
 // GFX1035:   "-target-cpu" "gfx1035"
 // GFX1036:   "-target-cpu" "gfx1036"
+// GFX1100:   "-target-cpu" "gfx1100"
+// GFX1101:   "-target-cpu" "gfx1101"
+// GFX1102:   "-target-cpu" "gfx1102"
+// GFX1103:   "-target-cpu" "gfx1103"
Index: clang/test/Driver/amdgpu-macros.cl
===
--- clang/test/Driver/amdgpu-macros.cl
+++ clang/test/Driver/amdgpu-macros.cl
@@ -120,6 +120,10 @@
 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1034 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1034
 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1035 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1035
 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1036 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1036
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1100
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1101 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1101
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1102 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1102
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1103 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1103
 
 // ARCH-GCN-DAG: #define FP_FAST_FMA 1
 
Index: clang/test/CodeGenOpenCL/amdgpu-features.cl
===
--- clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -37,6 +37,10 @@
 // RUN: %clang_

[PATCH] D138868: AMDGPU/clang: Remove target features from address space test builtins

2022-12-29 Thread Joe Nash via Phabricator via cfe-commits
Joe_Nash added inline comments.



Comment at: clang/test/CodeGenOpenCL/builtins-amdgcn-flat-address-space.cl:8
+// be initialized to something useful. The proper way to diagnose invalid flat
+// usage is to forbid flat pointers on unsupported targets.
+

What part of the toolchain is responsible for forbiding flat pointers on 
unsupported targets?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D138868/new/

https://reviews.llvm.org/D138868

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D138868: AMDGPU/clang: Remove target features from address space test builtins

2022-12-29 Thread Joe Nash via Phabricator via cfe-commits
Joe_Nash accepted this revision.
Joe_Nash added inline comments.
This revision is now accepted and ready to land.



Comment at: clang/test/CodeGenOpenCL/builtins-amdgcn-flat-address-space.cl:8
+// be initialized to something useful. The proper way to diagnose invalid flat
+// usage is to forbid flat pointers on unsupported targets.
+

arsenm wrote:
> Joe_Nash wrote:
> > What part of the toolchain is responsible for forbiding flat pointers on 
> > unsupported targets?
> The frontend should just error on languages with flat pointers. Really we 
> should just implement software flat pointers, it wouldn’t be difficult and 
> might only require minimal driver cooperation if any 
Ok, I just want to ask if that is in place or should be in place before this 
patch lands. So we don't accidentally convert a compile time error into a 
runtime error. Otherwise LGTM.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D138868/new/

https://reviews.llvm.org/D138868

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D138870: clang/AMDGPU: Remove flat-address-space from feature map

2022-12-30 Thread Joe Nash via Phabricator via cfe-commits
Joe_Nash added a comment.

The code looks fine, but as you say, the change visible in user code and could 
break something. Do you want to handle that somehow? Maybe wait for @b-sumner




Comment at: clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp:17
 #pragma omp metadirective \
-when(device = {isa("flat-address-space")} \
+when(device = {isa("dpp")} \
  : parallel) default(single)

Was the "dpp" attribute chosen arbitrarily?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D138870/new/

https://reviews.llvm.org/D138870

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D138870: clang/AMDGPU: Remove flat-address-space from feature map

2023-01-05 Thread Joe Nash via Phabricator via cfe-commits
Joe_Nash accepted this revision.
Joe_Nash added a comment.
This revision is now accepted and ready to land.

In D138870#4020211 , @arsenm wrote:

> In D138870#4020204 , @Joe_Nash 
> wrote:
>
>> The code looks fine, but as you say, the change visible in user code and 
>> could break something. Do you want to handle that somehow? Maybe wait for 
>> @b-sumner
>
> OpenMP assumes flat pointers all over, so if someone was relying on this 
> behavior it wasn't doing anything useful. They had dead code

I can't verify the idea that users of the behavior had dead code, but assuming 
that's right LGTM.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D138870/new/

https://reviews.llvm.org/D138870

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127904: [AMDGPU] gfx11 new dot instruction codegen support

2022-06-15 Thread Joe Nash via Phabricator via cfe-commits
Joe_Nash created this revision.
Joe_Nash added reviewers: foad, rampitec.
Herald added subscribers: kosarev, jsilvanus, hsmhsm, kerbowa, hiraditya, 
t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, arsenm.
Herald added a project: All.
Joe_Nash requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, wdng.
Herald added projects: clang, LLVM.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D127904

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
  llvm/include/llvm/IR/IntrinsicsAMDGPU.td
  llvm/lib/Target/AMDGPU/AMDGPUGISel.td
  llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
  llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
  llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
  llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
  llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
  llvm/lib/Target/AMDGPU/VOP3Instructions.td
  llvm/lib/Target/AMDGPU/VOP3PInstructions.td
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.bf16.bf16.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.f16.f16.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.f32.bf16.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot4.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot8.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot4.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll

Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
+
+declare i32 @llvm.amdgcn.sudot8(i1 %asign, i32 %a, i1 %bsign, i32 %b, i32 %c, i1 %clamp)
+
+define i32 @test_llvm_amdgcn_sudot8_uu(i32 %a, i32 %b, i32 %c) {
+; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT:v_dot8_i32_iu4 v0, v0, v1, v2 ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x1c]
+; GFX11-NEXT:s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+entry:
+  %ret = call i32 @llvm.amdgcn.sudot8(i1 0, i32 %a, i1 0, i32 %b, i32 %c, i1 0)
+  ret i32 %ret
+}
+
+define i32 @test_llvm_amdgcn_sudot8_us(i32 %a, i32 %b, i32 %c) {
+; GFX11-LABEL: test_llvm_amdgcn_sudot8_us:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT:v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[0,1,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x5c]
+; GFX11-NEXT:s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+entry:
+  %ret = call i32 @llvm.amdgcn.sudot8(i1 0, i32 %a, i1 1, i32 %b, i32 %c, i1 0)
+  ret i32 %ret
+}
+
+define i32 @test_llvm_amdgcn_sudot8_su(i32 %a, i32 %b, i32 %c) {
+; GFX11-LABEL: test_llvm_amdgcn_sudot8_su:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT:v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,0,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x3c]
+; GFX11-NEXT:s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+entry:
+  %ret = call i32 @llvm.amdgcn.sudot8(i1 1, i32 %a, i1 0, i32 %b, i32 %c, i1 0)
+  ret i32 %ret
+}
+
+define i32 @test_llvm_amdgcn_sudot8_ss(i32 %a, i32 %b, i32 %c) {
+; GFX11-LABEL: test_llvm_amdgcn_sudot8_ss:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT:v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,1,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x7c]
+; GFX11-NEXT:s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+entry:
+  %ret = call i32 @llvm.amdgcn.sudot8(i1 1, i32 %a, i1 1, i32 %b, i32 %c, i1 0)
+  ret i32 %ret
+}
+
+
+
+define i32 @test_llvm_amdgcn_sudot8_uu_clamp(i32 %a, i32 %b, i32 %c) {
+; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu_clamp:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT:v_dot8_i32_iu4 v0, v0, v1, v2 clamp ; encoding: [0x00,0x

[PATCH] D127904: [AMDGPU] gfx11 new dot instruction codegen support

2022-06-16 Thread Joe Nash via Phabricator via cfe-commits
Joe_Nash updated this revision to Diff 437552.
Joe_Nash marked 3 inline comments as done.
Joe_Nash added a comment.

added builtin positive tests, removed clamp from intrinsic comments, combined 
gisel with normal codegen test


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127904/new/

https://reviews.llvm.org/D127904

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl
  llvm/include/llvm/IR/IntrinsicsAMDGPU.td
  llvm/lib/Target/AMDGPU/AMDGPUGISel.td
  llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
  llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
  llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
  llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
  llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
  llvm/lib/Target/AMDGPU/VOP3Instructions.td
  llvm/lib/Target/AMDGPU/VOP3PInstructions.td
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot4.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot8.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot4.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll

Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
+
+declare i32 @llvm.amdgcn.sudot8(i1 %asign, i32 %a, i1 %bsign, i32 %b, i32 %c, i1 %clamp)
+
+define i32 @test_llvm_amdgcn_sudot8_uu(i32 %a, i32 %b, i32 %c) {
+; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT:v_dot8_i32_iu4 v0, v0, v1, v2 ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x1c]
+; GFX11-NEXT:s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+entry:
+  %ret = call i32 @llvm.amdgcn.sudot8(i1 0, i32 %a, i1 0, i32 %b, i32 %c, i1 0)
+  ret i32 %ret
+}
+
+define i32 @test_llvm_amdgcn_sudot8_us(i32 %a, i32 %b, i32 %c) {
+; GFX11-LABEL: test_llvm_amdgcn_sudot8_us:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT:v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[0,1,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x5c]
+; GFX11-NEXT:s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+entry:
+  %ret = call i32 @llvm.amdgcn.sudot8(i1 0, i32 %a, i1 1, i32 %b, i32 %c, i1 0)
+  ret i32 %ret
+}
+
+define i32 @test_llvm_amdgcn_sudot8_su(i32 %a, i32 %b, i32 %c) {
+; GFX11-LABEL: test_llvm_amdgcn_sudot8_su:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT:v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,0,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x3c]
+; GFX11-NEXT:s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+entry:
+  %ret = call i32 @llvm.amdgcn.sudot8(i1 1, i32 %a, i1 0, i32 %b, i32 %c, i1 0)
+  ret i32 %ret
+}
+
+define i32 @test_llvm_amdgcn_sudot8_ss(i32 %a, i32 %b, i32 %c) {
+; GFX11-LABEL: test_llvm_amdgcn_sudot8_ss:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT:v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,1,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x7c]
+; GFX11-NEXT:s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+entry:
+  %ret = call i32 @llvm.amdgcn.sudot8(i1 1, i32 %a, i1 1, i32 %b, i32 %c, i1 0)
+  ret i32 %ret
+}
+
+
+
+define i32 @test_llvm_amdgcn_sudot8_uu_clamp(i32 %a, i32 %b, i32 %c) {
+; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu_clamp:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT:v_dot8_i32_iu4 v0, v0, v1, v2 clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x1c]
+; GFX11-NEXT:s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+entry:
+  %ret = call i32 @llvm.amdgcn.sudot8(i1 0, i32 %a, i1 0, i32 %b, i32 %c, i1 1)
+  ret i32 %ret
+}
+
+define i32 @tes

[PATCH] D127904: [AMDGPU] gfx11 new dot instruction codegen support

2022-06-16 Thread Joe Nash via Phabricator via cfe-commits
Joe_Nash added inline comments.



Comment at: llvm/include/llvm/IR/IntrinsicsAMDGPU.td:1926
 
+// f16 %r = llvm.amdgcn.fdot2.f16.f16(v2f16 %a, v2f16 %b, f16 %c, i1 %clamp)
+//   %r = %a[0] * %b[0] + %a[1] * %b[1] + %c

rampitec wrote:
> I do not see clamp in the definition. Make a separate comment for the last 2?
I removed clamp from the comments.



Comment at: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.f16.f16.ll:2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s 
| FileCheck %s --check-prefixes=GFX11
+

arsenm wrote:
> Is there actually a reason to split the globalisel and DAG tests? I expect 
> tests this simple to be identical
Ok, I have combined the tests and they are identical.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127904/new/

https://reviews.llvm.org/D127904

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127904: [AMDGPU] gfx11 new dot instruction codegen support

2022-06-16 Thread Joe Nash via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG2d43de13df03: [AMDGPU] gfx11 new dot instruction codegen 
support (authored by Joe_Nash).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127904/new/

https://reviews.llvm.org/D127904

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl
  llvm/include/llvm/IR/IntrinsicsAMDGPU.td
  llvm/lib/Target/AMDGPU/AMDGPUGISel.td
  llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
  llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
  llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
  llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
  llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
  llvm/lib/Target/AMDGPU/VOP3Instructions.td
  llvm/lib/Target/AMDGPU/VOP3PInstructions.td
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot4.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot8.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot4.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll

Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
+
+declare i32 @llvm.amdgcn.sudot8(i1 %asign, i32 %a, i1 %bsign, i32 %b, i32 %c, i1 %clamp)
+
+define i32 @test_llvm_amdgcn_sudot8_uu(i32 %a, i32 %b, i32 %c) {
+; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT:v_dot8_i32_iu4 v0, v0, v1, v2 ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x1c]
+; GFX11-NEXT:s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+entry:
+  %ret = call i32 @llvm.amdgcn.sudot8(i1 0, i32 %a, i1 0, i32 %b, i32 %c, i1 0)
+  ret i32 %ret
+}
+
+define i32 @test_llvm_amdgcn_sudot8_us(i32 %a, i32 %b, i32 %c) {
+; GFX11-LABEL: test_llvm_amdgcn_sudot8_us:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT:v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[0,1,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x5c]
+; GFX11-NEXT:s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+entry:
+  %ret = call i32 @llvm.amdgcn.sudot8(i1 0, i32 %a, i1 1, i32 %b, i32 %c, i1 0)
+  ret i32 %ret
+}
+
+define i32 @test_llvm_amdgcn_sudot8_su(i32 %a, i32 %b, i32 %c) {
+; GFX11-LABEL: test_llvm_amdgcn_sudot8_su:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT:v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,0,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x3c]
+; GFX11-NEXT:s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+entry:
+  %ret = call i32 @llvm.amdgcn.sudot8(i1 1, i32 %a, i1 0, i32 %b, i32 %c, i1 0)
+  ret i32 %ret
+}
+
+define i32 @test_llvm_amdgcn_sudot8_ss(i32 %a, i32 %b, i32 %c) {
+; GFX11-LABEL: test_llvm_amdgcn_sudot8_ss:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT:v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,1,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x7c]
+; GFX11-NEXT:s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+entry:
+  %ret = call i32 @llvm.amdgcn.sudot8(i1 1, i32 %a, i1 1, i32 %b, i32 %c, i1 0)
+  ret i32 %ret
+}
+
+
+
+define i32 @test_llvm_amdgcn_sudot8_uu_clamp(i32 %a, i32 %b, i32 %c) {
+; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu_clamp:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT:v_dot8_i32_iu4 v0, v0, v1, v2 clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x1c]
+; GFX11-NEXT:s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+entry:
+  %ret = call i32 @llvm.amdgcn.sudot8(i1 0, i32 %a, i1 0, i32 %b, i32 %c, i1 1)
+  ret i32 %ret
+}
+
+defi

[PATCH] D138216: [AMDGPU] Intrinsic to expose s_wait_event for export ready

2022-11-23 Thread Joe Nash via Phabricator via cfe-commits
Joe_Nash accepted this revision.
Joe_Nash added a comment.
This revision is now accepted and ready to land.

LGTM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D138216/new/

https://reviews.llvm.org/D138216

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits