[clang] [clang-tools-extra] [llvm] [AMDGPU] Src1 of VOP3 DPP instructions can be SGPR on GFX12 (PR #77929)

Wed Jan 17 06:58:47 PST 2024

https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/77929

>From 4299ba898449f782c642b0c27f0ec9970aee0a1c Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 12 Jan 2024 11:34:02 +0000
Subject: [PATCH 1/2] [AMDGPU] Src1 of VOP3 DPP instructions can be SGPR on
 GFX12

---
 llvm/lib/Target/AMDGPU/AMDGPU.td                |  3 ++-
 llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir  |  1 +
 llvm/test/MC/AMDGPU/gfx12_asm_features.s        | 17 +++++++++++++++++
 .../Disassembler/AMDGPU/gfx12_dasm_features.txt | 13 +++++++++++++
 4 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index b27edb1e9e14bb..682ca6c57c973b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1502,7 +1502,8 @@ def FeatureISAVersion12 : FeatureSet<
    FeatureHasRestrictedSOffset,
    FeatureVGPRSingleUseHintInsts,
    FeatureMADIntraFwdBug,
-   FeatureScalarDwordx3Loads]>;
+   FeatureScalarDwordx3Loads,
+   FeatureDPPSrc1SGPR]>;
 
 //===----------------------------------------------------------------------===//
 
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index fe1345e29f133d..7d081a1491da6e 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -1,5 +1,6 @@
 # RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
 # RUN: llc -march=amdgcn -mcpu=gfx1150 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
+# RUN: llc -march=amdgcn -mcpu=gfx1200 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
 
 ---
 
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_features.s b/llvm/test/MC/AMDGPU/gfx12_asm_features.s
index 7e58bdb3b444e1..da4464c6494dbf 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_features.s
@@ -1,5 +1,22 @@
 // RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefix=GFX12 %s
 
+//
+// Subtargets allow src1 of VOP3 DPP instructions to be SGPR or inlinable
+// constant.
+//
+
+v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff]
+
+v_add3_u32_e64_dpp v5, v1, 42, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x54,0x0d,0x04,0x01,0x1b,0x00,0xff]
+
+v_add3_u32_e64_dpp v5, v1, s2, v0 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x00,0x04,0x01,0x77,0x39,0x05]
+
+v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05]
+
 //
 // Elements of CPol operand can be given in any order
 //
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt
new file mode 100644
index 00000000000000..2c64522422ad0d
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s
+
+# GFX12: v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff
+
+# GFX12: v_add3_u32_e64_dpp v5, v1, 42, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x54,0x0d,0x04,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x55,0xd6,0xfa,0x54,0x0d,0x04,0x01,0x1b,0x00,0xff
+
+# GFX12: v_add3_u32_e64_dpp v5, v1, s2, v0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x00,0x04,0x01,0x77,0x39,0x05]
+0x05,0x00,0x55,0xd6,0xe9,0x04,0x00,0x04,0x01,0x77,0x39,0x05
+
+# GFX12: v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05]
+0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05

>From a65834ad3d8aed3e9cb1414d7576d5244a31f8a2 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 17 Jan 2024 14:39:09 +0000
Subject: [PATCH 2/2] More tests

---
 llvm/test/MC/AMDGPU/gfx1150_asm_features.s                 | 6 ++++++
 llvm/test/MC/AMDGPU/gfx12_asm_features.s                   | 6 ++++++
 llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_features.txt | 6 ++++++
 llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt   | 6 ++++++
 4 files changed, 24 insertions(+)

diff --git a/llvm/test/MC/AMDGPU/gfx1150_asm_features.s b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s
index a4904c40b40ae7..55c855175a89e0 100644
--- a/llvm/test/MC/AMDGPU/gfx1150_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s
@@ -17,3 +17,9 @@ v_add3_u32_e64_dpp v5, v1, s2, v0 dpp8:[7,6,5,4,3,2,1,0]
 
 v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0]
 // GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05]
+
+v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1150: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_features.s b/llvm/test/MC/AMDGPU/gfx12_asm_features.s
index da4464c6494dbf..d0540d2199b120 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_features.s
@@ -17,6 +17,12 @@ v_add3_u32_e64_dpp v5, v1, s2, v0 dpp8:[7,6,5,4,3,2,1,0]
 v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0]
 // GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05]
 
+v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05]
+
+v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1150: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
 //
 // Elements of CPol operand can be given in any order
 //
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_features.txt
index 77161a49364b0a..5ed9a673db2e69 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_features.txt
@@ -11,3 +11,9 @@
 
 # GFX1150: v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05]
 0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05
+
+# GFX1150: v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05]
+0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05
+
+# GFX1150: v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt
index 2c64522422ad0d..4ea6d8b7e27fb3 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt
@@ -11,3 +11,9 @@
 
 # GFX12: v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05]
 0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05
+
+# GFX1150: v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05]
+0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05
+
+# GFX1150: v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05