[llvm] [LV] Fold isPredicatedInst into isMaskRequired (PR #134261)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 8 02:10:54 PDT 2025
================
@@ -1040,65 +1040,161 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFNONE: [[END]]:
; TFNONE-NEXT: ret void
;
-; TFCOMMON-LABEL: define void @test_widen_exp_v2(
-; TFCOMMON-SAME: ptr noalias [[P2:%.*]], ptr noalias [[P:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
-; TFCOMMON-NEXT: [[ENTRY:.*]]:
-; TFCOMMON-NEXT: br label %[[LOOP:.*]]
-; TFCOMMON: [[LOOP]]:
-; TFCOMMON-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2]], align 8
-; TFCOMMON-NEXT: [[EXP:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR7:[0-9]+]]
-; TFCOMMON-NEXT: [[COND1:%.*]] = fcmp ogt double [[EXP]], 0.000000e+00
-; TFCOMMON-NEXT: [[SINK:%.*]] = select i1 [[COND1]], double 0.000000e+00, double 1.000000e+00
-; TFCOMMON-NEXT: store double [[SINK]], ptr [[P]], align 8
-; TFCOMMON-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
-; TFCOMMON-NEXT: [[COND2:%.*]] = icmp eq i64 [[IV]], [[N]]
-; TFCOMMON-NEXT: br i1 [[COND2]], label %[[END:.*]], label %[[LOOP]]
-; TFCOMMON: [[END]]:
-; TFCOMMON-NEXT: ret void
+; TFALWAYS-LABEL: define void @test_widen_exp_v2(
+; TFALWAYS-SAME: ptr noalias [[P2:%.*]], ptr noalias [[P:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
+; TFALWAYS-NEXT: [[ENTRY:.*]]:
+; TFALWAYS-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
+; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 1
+; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
+; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
+; TFALWAYS-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 2
+; TFALWAYS-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 2
+; TFALWAYS-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0
+; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[TMP0]])
+; TFALWAYS-NEXT: br label %[[VECTOR_BODY:.*]]
+; TFALWAYS: [[VECTOR_BODY]]:
+; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
+; TFALWAYS-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8
+; TFALWAYS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i64 0
+; TFALWAYS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; TFALWAYS-NEXT: [[TMP5:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
+; TFALWAYS-NEXT: [[TMP6:%.*]] = fcmp ogt <2 x double> [[TMP5]], zeroinitializer
+; TFALWAYS-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP6]], splat (i1 true)
+; TFALWAYS-NEXT: [[TMP8:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP7]], <2 x i1> zeroinitializer
+; TFALWAYS-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP8]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
+; TFALWAYS-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
+; TFALWAYS-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; TFALWAYS: [[PRED_STORE_IF]]:
+; TFALWAYS-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0
----------------
david-arm wrote:
This may be a performance regression. I'll have to check out the patch and test this locally. I'm not sure if we should really be using tail-folding here when we are forced to scalarise the stores.
https://github.com/llvm/llvm-project/pull/134261
More information about the llvm-commits
mailing list