[PATCH] D110231: [AMDGPU] Add constrained shift pattern matches.
Abinav Puthan Purayil via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 22 04:27:41 PDT 2021
abinavpp created this revision.
abinavpp added reviewers: arsenm, foad, rampitec.
Herald added subscribers: kerbowa, hiraditya, t-tye, Anastasia, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl.
abinavpp requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.
The motivation for this is due to clang's conformance to
https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_C.html#operators-shift
which makes clang emit (<shift> a, (and b, <width> - 1)) for `a <shift> b`
in OpenCL where a is an int of bit width <width>.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D110231
Files:
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll
llvm/test/CodeGen/AMDGPU/shift-opts.ll
Index: llvm/test/CodeGen/AMDGPU/shift-opts.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/shift-opts.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx908 < %s | FileCheck %s
+
+define i32 @constrained_shift(i32 %a, i32 %b) {
+; CHECK-LABEL: constrained_shift:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_lshlrev_b32_e32 v2, v1, v0
+; CHECK-NEXT: v_lshrrev_b32_e32 v3, v1, v0
+; CHECK-NEXT: v_ashrrev_i32_e32 v0, v1, v0
+; CHECK-NEXT: v_add3_u32 v0, v2, v3, v0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %and = and i32 %b, 31
+ %shl = shl i32 %a, %and
+ %lshr = lshr i32 %a, %and
+ %ashr = ashr i32 %a, %and
+ %ret.0 = add i32 %shl, %lshr
+ %ret = add i32 %ret.0, %ashr
+ ret i32 %ret
+}
Index: llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll
+++ llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll
@@ -245,8 +245,7 @@
}
; GCN-LABEL: {{^}}trunc_shl_and31:
-; GCN: s_and_b32 s[[AMT:[0-9]+]], s{{[0-9]+}}, 31
-; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, s[[AMT]], v{{[0-9]+}}
+; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
; GCN-NOT: v_lshl_b64
; GCN-NOT: v_lshlrev_b64
define amdgpu_kernel void @trunc_shl_and31(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
Index: llvm/lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstructions.td
+++ llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2510,6 +2510,13 @@
let SubtargetPredicate = NotHasAddNoCarryInsts;
}
+// Eliminate `and` for constrained shift since the target shift instructions are
+// constrained.
+class ConstrainedI32ShiftPat<SDNode src, InstSI dst>
+ : AMDGPUPat <(src i32:$a, (and i32:$b, 31)), (dst $b, $a)>;
+def : ConstrainedI32ShiftPat<shl, V_LSHLREV_B32_e64>;
+def : ConstrainedI32ShiftPat<srl, V_LSHRREV_B32_e64>;
+def : ConstrainedI32ShiftPat<sra, V_ASHRREV_I32_e64>;
// Avoid pointlessly materializing a constant in VGPR.
// FIXME: Should also do this for readlane, but tablegen crashes on
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D110231.374181.patch
Type: text/x-patch
Size: 2350 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210922/1b69fd5a/attachment.bin>
More information about the llvm-commits
mailing list