[llvm] [AMDGPU] Support v_lshl_add_u64 with non-constant shift amount (PR #179904)
Frederik Harwath via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 5 02:36:35 PST 2026
https://github.com/frederik-h created https://github.com/llvm/llvm-project/pull/179904
None
>From f560b185c0ff075832983962a831e1df55683095 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Thu, 5 Feb 2026 02:31:42 -0500
Subject: [PATCH] [AMDGPU] Support v_lshl_add_u64 with non-constant shift
amount
---
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 18 +++-----
llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll | 50 ++++++++++++++++++++++
2 files changed, 56 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 56127c7e2f48f..286ecc3bd294d 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -638,19 +638,13 @@ class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : ThreeOpFragSDA
}
def shl_0_to_4 : PatFrag<
- (ops node:$src0, node:$src1), (shl node:$src0, node:$src1),
- [{
- if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
- return C->getZExtValue() <= 4;
- }
- return false;
- }]> {
+ (ops node:$src0, node:$src1), (shl node:$src0, node:$src1), [{
+ KnownBits KB = CurDAG->computeKnownBits(N->getOperand(1));
+ return KB.getMaxValue().getZExtValue() <= 4;
+ }]> {
let GISelPredicateCode = [{
- int64_t Imm = 0;
- if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Imm)) &&
- !mi_match(MI.getOperand(2).getReg(), MRI, m_Copy(m_ICst(Imm))))
- return false;
- return (uint64_t)Imm <= 4;
+ KnownBits KB = VT->getKnownBits(MI.getOperand(2).getReg());
+ return KB.getMaxValue().getZExtValue() <= 4;
}];
}
diff --git a/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll b/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll
index 247a0a9a64b33..d4b99b3d0f98a 100644
--- a/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll
+++ b/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll
@@ -116,3 +116,53 @@ define i32 @lshl_add_u64_gep(ptr %p, i64 %a) {
%v = load i32, ptr %gep
ret i32 %v
}
+
+define i64 @lshl_add_u64_vvv_and_2(i64 %v, i64 %a, i64 %s) {
+; GCN-LABEL: lshl_add_u64_vvv_and_2:
+; GCN: v_and_b32_e32 [[AND:v[0-9:]+]], 2, v{{[0-9:]+}}
+; GCN: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], [[AND]], v[{{[0-9:]+}}]
+ %and = and i64 %s, 2
+ %shl = shl i64 %v, %and
+ %add = add i64 %shl, %a
+ ret i64 %add
+}
+
+define i64 @lshl_add_u64_vvv_and_4(i64 %v, i64 %a, i64 %s) {
+; GCN-LABEL: lshl_add_u64_vvv_and_4:
+; GCN: v_and_b32_e32 [[AND:v[0-9:]+]], 4, v{{[0-9:]+}}
+; GCN: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], [[AND]], v[{{[0-9:]+}}]
+ %and = and i64 %s, 4
+ %shl = shl i64 %v, %and
+ %add = add i64 %shl, %a
+ ret i64 %add
+}
+
+define i64 @lshl_add_u64_vvv_and_5(i64 %v, i64 %a, i64 %s) {
+; GCN-LABEL: lshl_add_u64_vvv_and_5:
+; GFX942: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 0, v[{{[0-9:]+}}]
+; GFX1250: v_add_nc_u64_e32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
+ %and = and i64 %s, 5
+ %shl = shl i64 %v, %and
+ %add = add i64 %shl, %a
+ ret i64 %add
+}
+
+define i64 @lshl_add_u64_vvv_urem(i64 %v, i64 %a, i64 %s) {
+; GCN-LABEL: lshl_add_u64_vvv_urem:
+; GCN: v_and_b32_e32 [[AND:v[0-9:]+]], 3, v{{[0-9:]+}}
+; GCN: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], [[AND]], v[{{[0-9:]+}}]
+ %urem = urem i64 %s, 4
+ %shl = shl i64 %v, %urem
+ %add = add i64 %shl, %a
+ ret i64 %add
+}
+
+define i64 @lshl_add_u64_vvv_srem(i64 %v, i64 %a, i64 %s) {
+; GCN-LABEL: lshl_add_u64_vvv_srem:
+; GFX942: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 0, v[{{[0-9:]+}}]
+; GFX1250: v_add_nc_u64_e32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
+ %srem = srem i64 %s, 4
+ %shl = shl i64 %v, %srem
+ %add = add i64 %shl, %a
+ ret i64 %add
+}
More information about the llvm-commits
mailing list