[llvm] [LoopVectorize] Add cost of generating tail-folding mask to the loop (PR #130565)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 10 05:03:42 PDT 2025


================
@@ -1018,164 +1018,63 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
 ; TFNONE:       end:
 ; TFNONE-NEXT:    ret void
 ;
-; TFALWAYS-LABEL: @test_widen_exp_v2(
-; TFALWAYS-NEXT:  entry:
-; TFALWAYS-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], 1
-; TFALWAYS-NEXT:    [[N_RND_UP:%.*]] = add i64 [[TMP0]], 1
-; TFALWAYS-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
-; TFALWAYS-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; TFALWAYS-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], 2
-; TFALWAYS-NEXT:    [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 2
-; TFALWAYS-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0
-; TFALWAYS-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[TMP0]])
-; TFALWAYS-NEXT:    br label [[VECTOR_BODY:%.*]]
-; TFALWAYS:       vector.body:
-; TFALWAYS-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
-; TFALWAYS-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
-; TFALWAYS-NEXT:    [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8
-; TFALWAYS-NEXT:    [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]]
-; TFALWAYS-NEXT:    [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]]
-; TFALWAYS-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
-; TFALWAYS-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
-; TFALWAYS-NEXT:    [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
-; TFALWAYS-NEXT:    [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true)
-; TFALWAYS-NEXT:    [[TMP11:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer
-; TFALWAYS-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
-; TFALWAYS-NEXT:    [[TMP12:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
-; TFALWAYS-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
-; TFALWAYS:       pred.store.if:
-; TFALWAYS-NEXT:    [[TMP13:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0
-; TFALWAYS-NEXT:    store double [[TMP13]], ptr [[P:%.*]], align 8
-; TFALWAYS-NEXT:    br label [[PRED_STORE_CONTINUE]]
-; TFALWAYS:       pred.store.continue:
-; TFALWAYS-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
-; TFALWAYS-NEXT:    br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
-; TFALWAYS:       pred.store.if1:
-; TFALWAYS-NEXT:    [[TMP15:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1
-; TFALWAYS-NEXT:    store double [[TMP15]], ptr [[P]], align 8
-; TFALWAYS-NEXT:    br label [[PRED_STORE_CONTINUE2]]
-; TFALWAYS:       pred.store.continue2:
-; TFALWAYS-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
-; TFALWAYS-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP3]])
-; TFALWAYS-NEXT:    [[TMP16:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
-; TFALWAYS-NEXT:    [[TMP17:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0
-; TFALWAYS-NEXT:    br i1 [[TMP17]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
-; TFALWAYS:       end:
-; TFALWAYS-NEXT:    ret void
-;
-; TFFALLBACK-LABEL: @test_widen_exp_v2(
-; TFFALLBACK-NEXT:  entry:
-; TFFALLBACK-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], 1
-; TFFALLBACK-NEXT:    [[N_RND_UP:%.*]] = add i64 [[TMP0]], 1
-; TFFALLBACK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
-; TFFALLBACK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; TFFALLBACK-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], 2
-; TFFALLBACK-NEXT:    [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 2
-; TFFALLBACK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0
-; TFFALLBACK-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[TMP0]])
-; TFFALLBACK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; TFFALLBACK:       vector.body:
-; TFFALLBACK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
-; TFFALLBACK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
-; TFFALLBACK-NEXT:    [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8
-; TFFALLBACK-NEXT:    [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]]
----------------
fhahn wrote:

Do we lose test coverage for widening/scalarizing exp here or is this also covered somewhere else?

https://github.com/llvm/llvm-project/pull/130565


More information about the llvm-commits mailing list