[llvm] 253f804 - [amdgpu] Update med3 combine to skip i64

Thu Mar 18 08:56:52 PDT 2021

Author: Jon Chesterfield
Date: 2021-03-18T15:56:41Z
New Revision: 253f804debb3424470b2ed27f3c812ead908d4ca

URL: https://github.com/llvm/llvm-project/commit/253f804debb3424470b2ed27f3c812ead908d4ca
DIFF: https://github.com/llvm/llvm-project/commit/253f804debb3424470b2ed27f3c812ead908d4ca.diff

LOG: [amdgpu] Update med3 combine to skip i64

[amdgpu] Update med3 combine to skip i64

Fixes an assumption that a type which is not i32 will be i16. This asserts
when trying to sign/zero extend an i64 to i32.

Test case was cut down from an openmp application. Variations on it are hit by
other combines before reaching the problematic one, e.g. replacing the
immediate values with other function arguments changes the codegen path and
misses this combine.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D98872

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/smed3.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index eb5b06e5a46b..124f7449bc27 100644

--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9772,15 +9772,19 @@ SDValue SITargetLowering::performIntMed3ImmCombine(
   }
 
   // If there isn't a 16-bit med3 operation, convert to 32-bit.
-  MVT NVT = MVT::i32;
-  unsigned ExtOp = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+  if (VT == MVT::i16) {
+    MVT NVT = MVT::i32;
+    unsigned ExtOp = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
 
-  SDValue Tmp1 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(0));
-  SDValue Tmp2 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(1));
-  SDValue Tmp3 = DAG.getNode(ExtOp, SL, NVT, Op1);
+    SDValue Tmp1 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(0));
+    SDValue Tmp2 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(1));
+    SDValue Tmp3 = DAG.getNode(ExtOp, SL, NVT, Op1);
 
-  SDValue Med3 = DAG.getNode(Med3Opc, SL, NVT, Tmp1, Tmp2, Tmp3);
-  return DAG.getNode(ISD::TRUNCATE, SL, VT, Med3);
+    SDValue Med3 = DAG.getNode(Med3Opc, SL, NVT, Tmp1, Tmp2, Tmp3);
+    return DAG.getNode(ISD::TRUNCATE, SL, VT, Med3);
+  }
+
+  return SDValue();
 }
 
 static ConstantFPSDNode *getSplatConstantFP(SDValue Op) {

diff  --git a/llvm/test/CodeGen/AMDGPU/smed3.ll b/llvm/test/CodeGen/AMDGPU/smed3.ll
index 16aff2edba95..494510430ad5 100644
--- a/llvm/test/CodeGen/AMDGPU/smed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/smed3.ll
@@ -80,6 +80,19 @@ define amdgpu_kernel void @v_test_smed3_r_i_i_i64(i64 addrspace(1)* %out, i64 ad
   ret void
 }
 
+; Regression test for performIntMed3ImmCombine extending arguments to 32 bit
+; which failed for 64 bit arguments. Previously asserted / crashed.
+; GCN-LABEL: {{^}}test_intMed3ImmCombine_no_32bit_extend:
+; GCN: v_cmp_lt_i64
+; GCN: v_cmp_gt_i64
+define i64 @test_intMed3ImmCombine_no_32bit_extend(i64 %x) {
+  %smax = call i64 @llvm.smax.i64(i64 %x, i64 -2)
+  %smin = call i64 @llvm.smin.i64(i64 %smax, i64 2)
+  ret i64 %smin
+}
+declare i64 @llvm.smax.i64(i64, i64)
+declare i64 @llvm.smin.i64(i64, i64)
+
 ; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i16:
 ; SICIVI: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
 ; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17