[llvm] [LV] Fold isPredicatedInst into isMaskRequired (PR #134261)

David Sherwood via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 8 02:10:54 PDT 2025


================
@@ -1040,65 +1040,161 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
 ; TFNONE:       [[END]]:
 ; TFNONE-NEXT:    ret void
 ;
-; TFCOMMON-LABEL: define void @test_widen_exp_v2(
-; TFCOMMON-SAME: ptr noalias [[P2:%.*]], ptr noalias [[P:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
-; TFCOMMON-NEXT:  [[ENTRY:.*]]:
-; TFCOMMON-NEXT:    br label %[[LOOP:.*]]
-; TFCOMMON:       [[LOOP]]:
-; TFCOMMON-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; TFCOMMON-NEXT:    [[LD:%.*]] = load double, ptr [[P2]], align 8
-; TFCOMMON-NEXT:    [[EXP:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR7:[0-9]+]]
-; TFCOMMON-NEXT:    [[COND1:%.*]] = fcmp ogt double [[EXP]], 0.000000e+00
-; TFCOMMON-NEXT:    [[SINK:%.*]] = select i1 [[COND1]], double 0.000000e+00, double 1.000000e+00
-; TFCOMMON-NEXT:    store double [[SINK]], ptr [[P]], align 8
-; TFCOMMON-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
-; TFCOMMON-NEXT:    [[COND2:%.*]] = icmp eq i64 [[IV]], [[N]]
-; TFCOMMON-NEXT:    br i1 [[COND2]], label %[[END:.*]], label %[[LOOP]]
-; TFCOMMON:       [[END]]:
-; TFCOMMON-NEXT:    ret void
+; TFALWAYS-LABEL: define void @test_widen_exp_v2(
+; TFALWAYS-SAME: ptr noalias [[P2:%.*]], ptr noalias [[P:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
+; TFALWAYS-NEXT:  [[ENTRY:.*]]:
+; TFALWAYS-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
+; TFALWAYS-NEXT:    [[N_RND_UP:%.*]] = add i64 [[TMP0]], 1
+; TFALWAYS-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
+; TFALWAYS-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
+; TFALWAYS-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], 2
+; TFALWAYS-NEXT:    [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 2
+; TFALWAYS-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0
+; TFALWAYS-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[TMP0]])
+; TFALWAYS-NEXT:    br label %[[VECTOR_BODY:.*]]
+; TFALWAYS:       [[VECTOR_BODY]]:
+; TFALWAYS-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; TFALWAYS-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
+; TFALWAYS-NEXT:    [[TMP4:%.*]] = load double, ptr [[P2]], align 8
+; TFALWAYS-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i64 0
+; TFALWAYS-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; TFALWAYS-NEXT:    [[TMP5:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
+; TFALWAYS-NEXT:    [[TMP6:%.*]] = fcmp ogt <2 x double> [[TMP5]], zeroinitializer
+; TFALWAYS-NEXT:    [[TMP7:%.*]] = xor <2 x i1> [[TMP6]], splat (i1 true)
+; TFALWAYS-NEXT:    [[TMP8:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP7]], <2 x i1> zeroinitializer
+; TFALWAYS-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP8]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
+; TFALWAYS-NEXT:    [[TMP9:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
+; TFALWAYS-NEXT:    br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; TFALWAYS:       [[PRED_STORE_IF]]:
+; TFALWAYS-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0
----------------
david-arm wrote:

This may be a performance regression. I'll have to check out the patch and test this locally. I'm not sure if we should really be using tail-folding here when we are forced to scalarise the stores.

https://github.com/llvm/llvm-project/pull/134261


More information about the llvm-commits mailing list