[llvm] d06c2ef - [AMDGPU] Support v_lshl_add_u64 in gfx1250 (#145591)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 24 15:49:04 PDT 2025
Author: Stanislav Mekhanoshin
Date: 2025-06-24T15:49:01-07:00
New Revision: d06c2efd67e54193017606cba315facbd50167a4
URL: https://github.com/llvm/llvm-project/commit/d06c2efd67e54193017606cba315facbd50167a4
DIFF: https://github.com/llvm/llvm-project/commit/d06c2efd67e54193017606cba315facbd50167a4.diff
LOG: [AMDGPU] Support v_lshl_add_u64 in gfx1250 (#145591)
It also brings in some DPP changes needed to define it.
Added:
llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s
llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
Modified:
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/VOP3Instructions.td
llvm/lib/Target/AMDGPU/VOPInstructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 07a4292ef28bc..27b3d6bc9440c 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -661,6 +661,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
break;
+ // FIXME: DecoderTableGFX125064 is not defined yet.
+ if (isGFX1250() &&
+ tryDecodeInst(DecoderTableGFX1250_FAKE1664, MI, QW, Address, CS))
+ break;
+
if (isGFX12() &&
tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
Address, CS))
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 89a9ecc27c6ed..9ed054449c264 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -32,6 +32,7 @@ class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
let HasExtDPP = 0;
}
+let HasExt64BitDPP = 1 in {
def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32>;
def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64>;
@@ -48,10 +49,13 @@ class V_MUL_PROF<VOPProfile P> : VOP3_Profile<P> {
let HasExtDPP = 0;
}
+def V_LSHL_ADD_U64_PROF : VOP3_Profile<VOP_I64_I64_I32_I64>;
+
def DIV_FIXUP_F32_PROF : VOP3_Profile<VOP_F32_F32_F32_F32> {
let HasExtVOP3DPP = 0;
let HasExtDPP = 0;
}
+} // End HasExt64BitDPP = 1;
//===----------------------------------------------------------------------===//
// VOP3 INTERP
@@ -722,7 +726,7 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32
// V_LSHL_ADD_U64: D0.u64 = (S0.u64 << S1.u[2:0]) + S2.u64
// src0 is shifted left by 0-4 (use “0” to get ADD_U64).
let SubtargetPredicate = HasLshlAddU64Inst in
-defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>;
+defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", V_LSHL_ADD_U64_PROF>;
let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0,
SchedRW = [WriteFloatCvt] in {
@@ -1889,6 +1893,9 @@ let AssemblerPredicate = isGFX11Plus in {
def : AMDGPUMnemonicAlias<"v_xor_add_u32", "v_xad_u32">;
}
+// These instructions
diff er from GFX12 variant by supporting DPP:
+defm V_LSHL_ADD_U64 : VOP3Only_Realtriple_gfx1250<0x252>;
+
//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 19d490465f163..0b64b504466c8 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1552,12 +1552,17 @@ class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_fr
""));
}
-multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> {
+multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag,
+ list<Predicate> predicates = []> {
def _e64 : VOP3InstBase<OpName, P, node>;
- let SubtargetPredicate = isGFX11Plus in {
- if P.HasExtVOP3DPP then
- def _e64_dpp : VOP3_DPP_Pseudo <OpName, P>;
- } // end SubtargetPredicate = isGFX11Plus
+ if P.HasExtVOP3DPP then
+ def _e64_dpp : VOP3_DPP_Pseudo <OpName, P> {
+ let SubtargetPredicate = isGFX11Plus;
+ }
+ else if P.HasExt64BitDPP then
+ def _e64_dpp : VOP3_DPP_Pseudo <OpName, P> {
+ let OtherPredicates = !listconcat(predicates, [HasDPALU_DPP]);
+ }
}
class UniformUnaryFragOrOp<SDPatternOperator Op> {
@@ -1961,6 +1966,17 @@ multiclass VOP3Only_Realtriple_gfx12<bits<10> op, bit isSingle = 0> :
multiclass VOP3Only_Real_Base_gfx12<bits<10> op> :
VOP3_Real_Base<GFX12Gen, op, NAME, 1/*IsSingle*/>;
+multiclass VOP3Only_Realtriple_with_name_gfx12_not_gfx1250<bits<10> op, string opName,
+ string asmName, string pseudo_mnemonic = "",
+ bit isSingle = 0> :
+ VOP3_Realtriple_with_name<GFX12Not12_50Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
+
+multiclass VOP3Only_Real_Base_gfx1250<bits<10> op> :
+ VOP3_Real_Base<GFX1250Gen, op, NAME, 1/*IsSingle*/>;
+
+multiclass VOP3Only_Realtriple_gfx1250<bits<10> op, bit isSingle = 0> :
+ VOP3_Realtriple<GFX1250Gen, op, isSingle>;
+
multiclass VOP3_Realtriple_t16_gfx12<bits<10> op, string asmName, string opName = NAME,
string pseudo_mnemonic = "", bit isSingle = 0> :
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
new file mode 100644
index 0000000000000..0070c8ab9ee78
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
@@ -0,0 +1,17 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
+
+v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], 0, 1
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]
+
+v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]
+
+v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, 12345
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
new file mode 100644
index 0000000000000..553eacc8e7b61
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
@@ -0,0 +1,17 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
+
+v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], 0, 1
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]
+
+v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]
+
+v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, 12345
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s
new file mode 100644
index 0000000000000..e2fafe415ff7f
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s
@@ -0,0 +1,11 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX125X-ERR,GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] dpp8:[7,6,5,4,3,2,1,0]
+// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX125X-ERR-NEXT:{{^}}v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] dpp8:[7,6,5,4,3,2,1,0]
+// GFX125X-ERR-NEXT:{{^}} ^
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] quad_perm:[3,2,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX125X-ERR-NEXT:{{^}}v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] quad_perm:[3,2,1,0]
+// GFX125X-ERR-NEXT:{{^}} ^
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
new file mode 100644
index 0000000000000..d9d8f60fe3d17
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
@@ -0,0 +1,21 @@
+# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
+
+0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04
+# GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]
+
+0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02
+# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]
+
+0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00
+# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]
+
+0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04
+# GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]
+
+0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
+# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# GFX1250-FAKE16: {{.*}}
+# GFX1250-REAL16: {{.*}}
More information about the llvm-commits
mailing list