[PATCH] D150246: AMDGPU: Fix issue in shl(or) combine
Ruiling, Song via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri May 12 04:51:37 PDT 2023
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG60d9010aaf0f: AMDGPU: Fix issue in shl(or) combine (authored by ruiling).
Changed prior to commit:
https://reviews.llvm.org/D150246?vs=521303&id=521606#toc
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D150246/new/
https://reviews.llvm.org/D150246
Files:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
Index: llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
+++ llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
@@ -410,15 +410,12 @@
ret void
}
-; FIXME: This or should fold into an offset on the write
; GCN-LABEL: {{^}}shl_or_ptr_combine_2use_lds:
-; GCN: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
-; GCN: v_or_b32_e32 [[SCALE1:v[0-9]+]], 32, [[SCALE0]]
-; GCN: v_lshlrev_b32_e32 [[SCALE2:v[0-9]+]], 4, v0
-; GCN: ds_write_b32 [[SCALE1]], v{{[0-9]+}}
-; GCN: ds_write_b32 [[SCALE2]], v{{[0-9]+}} offset:64
+; GCN-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:8
+; GCN-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
define void @shl_or_ptr_combine_2use_lds(i32 %idx) #0 {
- %idx.add = or i32 %idx, 4
+ %idx.shl = shl i32 %idx, 1
+ %idx.add = or i32 %idx.shl, 1
%shl0 = shl i32 %idx.add, 3
%shl1 = shl i32 %idx.add, 4
%ptr0 = inttoptr i32 %shl0 to ptr addrspace(3)
@@ -427,15 +424,14 @@
store volatile i32 10, ptr addrspace(3) %ptr1
ret void
}
-
-; GCN-LABEL: {{^}}shl_or_ptr_combine_2use_max_lds_offset:
-; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
-; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
-; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+}} offset:65528
-; GCN-DAG: v_or_b32_e32 [[ADD1:v[0-9]+]], 0x1fff0, [[SCALE1]]
-; GCN: ds_write_b32 [[ADD1]], v{{[0-9]+$}}
-define void @shl_or_ptr_combine_2use_max_lds_offset(i32 %idx) #0 {
- %idx.add = or i32 %idx, 8191
+; GCN-LABEL: {{^}}shl_or_ptr_not_combine_2use_lds:
+; GCN: v_or_b32_e32 [[OR:v[0-9]+]], 1, v0
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, [[OR]]
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, [[OR]]
+; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+}}{{$}}
+; GCN-DAG: ds_write_b32 [[SCALE1]], v{{[0-9]+}}{{$}}
+define void @shl_or_ptr_not_combine_2use_lds(i32 %idx) #0 {
+ %idx.add = or i32 %idx, 1
%shl0 = shl i32 %idx.add, 3
%shl1 = shl i32 %idx.add, 4
%ptr0 = inttoptr i32 %shl0 to ptr addrspace(3)
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9550,6 +9550,8 @@
}
// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
+// (shl (or x, c1), c2) -> add (shl x, c2), (shl c1, c2) iff x and c1 share no
+// bits
// This is a variant of
// (mul (add x, c1), c2) -> add (mul x, c2), (mul c1, c2),
@@ -9584,8 +9586,14 @@
if (!CAdd)
return SDValue();
- // If the resulting offset is too large, we can't fold it into the addressing
- // mode offset.
+ SelectionDAG &DAG = DCI.DAG;
+
+ if (N0->getOpcode() == ISD::OR &&
+ !DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1)))
+ return SDValue();
+
+ // If the resulting offset is too large, we can't fold it into the
+ // addressing mode offset.
APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
Type *Ty = MemVT.getTypeForEVT(*DCI.DAG.getContext());
@@ -9595,7 +9603,6 @@
if (!isLegalAddressingMode(DCI.DAG.getDataLayout(), AM, Ty, AddrSpace))
return SDValue();
- SelectionDAG &DAG = DCI.DAG;
SDLoc SL(N);
EVT VT = N->getValueType(0);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D150246.521606.patch
Type: text/x-patch
Size: 3272 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230512/54e4b440/attachment.bin>
More information about the llvm-commits
mailing list