[PATCH] D135869: [AMDGPU][DAG] Only apply trunc/shift combine to 16 bit types

Thu Oct 13 04:58:17 PDT 2022

Pierre-vh created this revision.
Pierre-vh added a reviewer: arsenm.
Herald added subscribers: kosarev, foad, kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
Herald added a project: All.
Pierre-vh requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

Before, we checked <32 - probably assuming anything below 32 would be 16 bits.
However, odd integer types like i26 exist and are legal. Don't combine in those cases.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D135869

Files:
  llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
  llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll


Index: llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll
+++ llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll
@@ -149,3 +149,26 @@
   %trunc = trunc i64 %shift to i16
   ret i16 %trunc
 }
+
+; Checks that we don't blindly apply the combine on anything <32.
+; It's completely possible to trunc to weird integer types like i26
+; as an intermediate step of a bigger computation.
+;
+; Thus, we should have an alignbit here and not a lshrrev
+define i32 @trunc_srl_i64_25_to_i26(i64 %x) {
+; GCN-LABEL: trunc_srl_i64_25_to_i26:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v0, 0xa000000, v0
+; GCN-NEXT:    v_alignbit_b32 v0, 0, v0, 25
+; GCN-NEXT:    v_add_u32_e32 v0, 55, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %value.knownbits2 = and i64 %x, 167772160 ; 0xA000000
+  %shift = lshr i64 %value.knownbits2, 25
+  %trunc = trunc i64 %shift to i26
+  %add = add i26 %trunc, 55
+  %ext = zext i26 %add to i32
+  ret i32 %ext
+}
+
+
Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3237,7 +3237,7 @@
   //
   // i16 (trunc (srl i64:x, K)), K <= 16 ->
   //     i16 (trunc (srl (i32 (trunc x), K)))
-  if (VT.getScalarSizeInBits() < 32) {
+  if (VT.getScalarSizeInBits() == 16) {
     EVT SrcVT = Src.getValueType();
     if (SrcVT.getScalarSizeInBits() > 32 &&
         (Src.getOpcode() == ISD::SRL ||


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D135869.467446.patch
Type: text/x-patch
Size: 1660 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221013/865c7ebf/attachment.bin>