[llvm] 39522b1 - [SelectionDAG] Legalize intrinsic get.active.lane.mask

Sjoerd Meijer via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 25 07:01:46 PDT 2020


Author: Sjoerd Meijer
Date: 2020-08-25T15:00:10+01:00
New Revision: 39522b1e10428e4fa79a9d2dda20cbea7a1168e0

URL: https://github.com/llvm/llvm-project/commit/39522b1e10428e4fa79a9d2dda20cbea7a1168e0
DIFF: https://github.com/llvm/llvm-project/commit/39522b1e10428e4fa79a9d2dda20cbea7a1168e0.diff

LOG: [SelectionDAG] Legalize intrinsic get.active.lane.mask

This adapts legalization of intrinsic get.active.lane.mask to the new semantics
as described in D86147. Because the second argument is now the loop tripcount,
we legalize this intrinsic to an 'icmp ULT' instead of an ULE when it was the
backedge-taken count.

Differential Revision: https://reviews.llvm.org/D86302

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll
    llvm/test/CodeGen/Thumb2/active_lane_mask.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 9e57fa084ad8..38d11e4cd059 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6890,16 +6890,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
   case Intrinsic::get_active_lane_mask: {
     auto DL = getCurSDLoc();
     SDValue Index = getValue(I.getOperand(0));
-    SDValue BTC = getValue(I.getOperand(1));
+    SDValue TripCount = getValue(I.getOperand(1));
     Type *ElementTy = I.getOperand(0)->getType();
     EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
     unsigned VecWidth = VT.getVectorNumElements();
 
-    SmallVector<SDValue, 16> OpsBTC;
+    SmallVector<SDValue, 16> OpsTripCount;
     SmallVector<SDValue, 16> OpsIndex;
     SmallVector<SDValue, 16> OpsStepConstants;
     for (unsigned i = 0; i < VecWidth; i++) {
-      OpsBTC.push_back(BTC);
+      OpsTripCount.push_back(TripCount);
       OpsIndex.push_back(Index);
       OpsStepConstants.push_back(DAG.getConstant(i, DL, MVT::getVT(ElementTy)));
     }
@@ -6912,9 +6912,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants);
     SDValue VectorInduction = DAG.getNode(
        ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
-    SDValue VectorBTC = DAG.getBuildVector(VecTy, DL, OpsBTC);
+    SDValue VectorTripCount = DAG.getBuildVector(VecTy, DL, OpsTripCount);
     SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0),
-                                 VectorBTC, ISD::CondCode::SETULE);
+                                 VectorTripCount, ISD::CondCode::SETULT);
     setValue(&I, DAG.getNode(ISD::AND, DL, CCVT,
                              DAG.getNOT(DL, VectorInduction.getValue(1), CCVT),
                              SetCC));

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll
index d29c39a82124..b6076eb28ff4 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll
@@ -253,7 +253,7 @@ define arm_aapcs_vfpcc void @nearbyint(float* noalias nocapture readonly %pSrcA,
 ; CHECK-NEXT:    add.w r12, r12, #4
 ; CHECK-NEXT:    vpnot
 ; CHECK-NEXT:    vpstt
-; CHECK-NEXT:    vcmpt.u32 cs, q1, q2
+; CHECK-NEXT:    vcmpt.u32 hi, q1, q2
 ; CHECK-NEXT:    vldrwt.u32 q2, [r0], #16
 ; CHECK-NEXT:    vrintr.f32 s15, s11
 ; CHECK-NEXT:    vrintr.f32 s14, s10

diff  --git a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll
index e9dfccd320da..7696e6645195 100644
--- a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll
+++ b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll
@@ -13,7 +13,7 @@ define <4 x i32> @v4i32(i32 %index, i32 %BTC, <4 x i32> %V1, <4 x i32> %V2) {
 ; CHECK-NEXT:    vdup.32 q1, r1
 ; CHECK-NEXT:    vpnot
 ; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vcmpt.u32 cs, q1, q0
+; CHECK-NEXT:    vcmpt.u32 hi, q1, q0
 ; CHECK-NEXT:    vmov d0, r2, r3
 ; CHECK-NEXT:    vldr d1, [sp]
 ; CHECK-NEXT:    vldrw.u32 q1, [r0]
@@ -43,7 +43,7 @@ define <8 x i16> @v8i16(i32 %index, i32 %BTC, <8 x i16> %V1, <8 x i16> %V2) {
 ; CHECK-NEXT:    vmov.i8 q1, #0x0
 ; CHECK-NEXT:    vmov.i8 q2, #0xff
 ; CHECK-NEXT:    vadd.i32 q3, q0, r0
-; CHECK-NEXT:    vcmp.u32 cs, q5, q3
+; CHECK-NEXT:    vcmp.u32 hi, q5, q3
 ; CHECK-NEXT:    vpsel q4, q2, q1
 ; CHECK-NEXT:    vmov r1, s16
 ; CHECK-NEXT:    vmov.16 q0[0], r1
@@ -56,7 +56,7 @@ define <8 x i16> @v8i16(i32 %index, i32 %BTC, <8 x i16> %V1, <8 x i16> %V2) {
 ; CHECK-NEXT:    adr r1, .LCPI1_1
 ; CHECK-NEXT:    vldrw.u32 q4, [r1]
 ; CHECK-NEXT:    vadd.i32 q4, q4, r0
-; CHECK-NEXT:    vcmp.u32 cs, q5, q4
+; CHECK-NEXT:    vcmp.u32 hi, q5, q4
 ; CHECK-NEXT:    vpsel q5, q2, q1
 ; CHECK-NEXT:    vmov r1, s20
 ; CHECK-NEXT:    vmov.16 q0[4], r1
@@ -128,7 +128,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) {
 ; CHECK-NEXT:    vmov.i8 q5, #0x0
 ; CHECK-NEXT:    vmov.i8 q4, #0xff
 ; CHECK-NEXT:    vadd.i32 q1, q0, r0
-; CHECK-NEXT:    vcmp.u32 cs, q7, q1
+; CHECK-NEXT:    vcmp.u32 hi, q7, q1
 ; CHECK-NEXT:    vpsel q0, q4, q5
 ; CHECK-NEXT:    vmov r1, s0
 ; CHECK-NEXT:    vmov.16 q2[0], r1
@@ -141,7 +141,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) {
 ; CHECK-NEXT:    adr r1, .LCPI2_1
 ; CHECK-NEXT:    vldrw.u32 q0, [r1]
 ; CHECK-NEXT:    vadd.i32 q3, q0, r0
-; CHECK-NEXT:    vcmp.u32 cs, q7, q3
+; CHECK-NEXT:    vcmp.u32 hi, q7, q3
 ; CHECK-NEXT:    vpsel q0, q4, q5
 ; CHECK-NEXT:    vmov r1, s0
 ; CHECK-NEXT:    vmov.16 q2[4], r1
@@ -172,7 +172,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) {
 ; CHECK-NEXT:    adr r1, .LCPI2_2
 ; CHECK-NEXT:    vldrw.u32 q0, [r1]
 ; CHECK-NEXT:    vadd.i32 q0, q0, r0
-; CHECK-NEXT:    vcmp.u32 cs, q7, q0
+; CHECK-NEXT:    vcmp.u32 hi, q7, q0
 ; CHECK-NEXT:    vstrw.32 q0, [sp] @ 16-byte Spill
 ; CHECK-NEXT:    vpsel q6, q4, q5
 ; CHECK-NEXT:    vmov r1, s24
@@ -186,7 +186,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) {
 ; CHECK-NEXT:    adr r1, .LCPI2_3
 ; CHECK-NEXT:    vldrw.u32 q6, [r1]
 ; CHECK-NEXT:    vadd.i32 q6, q6, r0
-; CHECK-NEXT:    vcmp.u32 cs, q7, q6
+; CHECK-NEXT:    vcmp.u32 hi, q7, q6
 ; CHECK-NEXT:    vpsel q7, q4, q5
 ; CHECK-NEXT:    vmov r1, s28
 ; CHECK-NEXT:    vmov.16 q0[4], r1


        


More information about the llvm-commits mailing list