[llvm] 824dd81 - [AMDGPU][DAG] Fix trunc/shift combine condition

Thu Oct 20 23:36:14 PDT 2022

Author: Pierre van Houtryve
Date: 2022-10-21T06:36:07Z
New Revision: 824dd811be421cd946f64c25eb8ef3ac47eb19f2

URL: https://github.com/llvm/llvm-project/commit/824dd811be421cd946f64c25eb8ef3ac47eb19f2
DIFF: https://github.com/llvm/llvm-project/commit/824dd811be421cd946f64c25eb8ef3ac47eb19f2.diff

LOG: [AMDGPU][DAG] Fix trunc/shift combine condition

The condition needs to be different for right-shifts, else we may lose information in some cases.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D136059

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 296da2add8a34..d3e65516a526a 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3245,9 +3245,14 @@ SDValue AMDGPUTargetLowering::performTruncateCombine(
          Src.getOpcode() == ISD::SHL)) {
       SDValue Amt = Src.getOperand(1);
       KnownBits Known = DAG.computeKnownBits(Amt);
-      unsigned Size = VT.getScalarSizeInBits();
-      if ((Known.isConstant() && Known.getConstant().ule(Size)) ||
-          (Known.countMaxActiveBits() <= Log2_32(Size))) {
+
+      // - For left shifts, do the transform as long as the shift
+      //   amount is still legal for i32, so when ShiftAmt < 32 (<= 31)
+      // - For right shift, do it if ShiftAmt <= (32 - Size) to avoid
+      //   losing information stored in the high bits when truncating.
+      const unsigned MaxCstSize =
+          (Src.getOpcode() == ISD::SHL) ? 31 : (32 - VT.getScalarSizeInBits());
+      if (Known.getMaxValue().ule(MaxCstSize)) {
         EVT MidVT = VT.isVector() ?
           EVT::getVectorVT(*DAG.getContext(), MVT::i32,
                            VT.getVectorNumElements()) : MVT::i32;

diff  --git a/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll b/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll
index 8e17181d3b946..e50c7c592f887 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll
@@ -128,8 +128,8 @@ define i16 @trunc_srl_i64_var_mask16_to_i16(i64 %x, i64 %amt) {
 ; GCN-LABEL: trunc_srl_i64_var_mask16_to_i16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_and_b32_e32 v2, 16, v2
-; GCN-NEXT:    v_lshrrev_b64 v[0:1], v2, v[0:1]
+; GCN-NEXT:    v_and_b32_e32 v1, 16, v2
+; GCN-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %amt.masked = and i64 %amt, 16
   %shift = lshr i64 %x, %amt.masked
@@ -149,3 +149,19 @@ define i16 @trunc_srl_i64_var_mask31_to_i16(i64 %x, i64 %amt) {
   %trunc = trunc i64 %shift to i16
   ret i16 %trunc
 }
+
+define i32 @trunc_srl_i64_25_to_i26(i64 %x) {
+; GCN-LABEL: trunc_srl_i64_25_to_i26:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v0, 0xa000000, v0
+; GCN-NEXT:    v_alignbit_b32 v0, 0, v0, 25
+; GCN-NEXT:    v_add_u32_e32 v0, 55, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %value.knownbits2 = and i64 %x, 167772160 ; 0xA000000
+  %shift = lshr i64 %value.knownbits2, 25
+  %trunc = trunc i64 %shift to i26
+  %add = add i26 %trunc, 55
+  %ext = zext i26 %add to i32
+  ret i32 %ext
+}