[llvm] [LV] Support generating masks for switch terminators. (PR #99808)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Sun Aug 11 07:24:41 PDT 2024


================
@@ -588,9 +954,94 @@ define void @large_number_of_cases(ptr %start, ptr %end) {
 ; FORCED-LABEL: define void @large_number_of_cases(
 ; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
 ; FORCED-NEXT:  [[ENTRY:.*]]:
+; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
+; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
+; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
+; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
+; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; FORCED:       [[VECTOR_PH]]:
+; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
+; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
+; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
+; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
+; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
+; FORCED:       [[VECTOR_BODY]]:
+; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
+; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
+; FORCED-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 32
+; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
+; FORCED-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]]
+; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
+; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
+; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
+; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
+; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 1, i64 1, i64 1, i64 1>
+; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 1, i64 1, i64 1, i64 1>
+; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 3, i64 3, i64 3, i64 3>
+; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 3, i64 3, i64 3, i64 3>
+; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 11, i64 11, i64 11, i64 11>
+; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 11, i64 11, i64 11, i64 11>
+; FORCED-NEXT:    [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 99, i64 99, i64 99, i64 99>
+; FORCED-NEXT:    [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 99, i64 99, i64 99, i64 99>
+; FORCED-NEXT:    [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 213, i64 213, i64 213, i64 213>
+; FORCED-NEXT:    [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 213, i64 213, i64 213, i64 213>
+; FORCED-NEXT:    [[TMP19:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 238, i64 238, i64 238, i64 238>
+; FORCED-NEXT:    [[TMP20:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 238, i64 238, i64 238, i64 238>
+; FORCED-NEXT:    [[TMP21:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 513, i64 513, i64 513, i64 513>
+; FORCED-NEXT:    [[TMP22:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 513, i64 513, i64 513, i64 513>
+; FORCED-NEXT:    [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 791, i64 791, i64 791, i64 791>
+; FORCED-NEXT:    [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 791, i64 791, i64 791, i64 791>
+; FORCED-NEXT:    [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 899, i64 899, i64 899, i64 899>
+; FORCED-NEXT:    [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 899, i64 899, i64 899, i64 899>
+; FORCED-NEXT:    [[TMP27:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
+; FORCED-NEXT:    [[TMP28:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
+; FORCED-NEXT:    [[TMP29:%.*]] = or <4 x i1> [[TMP27]], [[TMP13]]
+; FORCED-NEXT:    [[TMP30:%.*]] = or <4 x i1> [[TMP28]], [[TMP14]]
+; FORCED-NEXT:    [[TMP31:%.*]] = or <4 x i1> [[TMP29]], [[TMP15]]
+; FORCED-NEXT:    [[TMP32:%.*]] = or <4 x i1> [[TMP30]], [[TMP16]]
+; FORCED-NEXT:    [[TMP33:%.*]] = or <4 x i1> [[TMP31]], [[TMP17]]
+; FORCED-NEXT:    [[TMP34:%.*]] = or <4 x i1> [[TMP32]], [[TMP18]]
+; FORCED-NEXT:    [[TMP35:%.*]] = or <4 x i1> [[TMP33]], [[TMP19]]
+; FORCED-NEXT:    [[TMP36:%.*]] = or <4 x i1> [[TMP34]], [[TMP20]]
+; FORCED-NEXT:    [[TMP37:%.*]] = or <4 x i1> [[TMP35]], [[TMP21]]
+; FORCED-NEXT:    [[TMP38:%.*]] = or <4 x i1> [[TMP36]], [[TMP22]]
+; FORCED-NEXT:    [[TMP39:%.*]] = or <4 x i1> [[TMP37]], [[TMP23]]
+; FORCED-NEXT:    [[TMP40:%.*]] = or <4 x i1> [[TMP38]], [[TMP24]]
+; FORCED-NEXT:    [[TMP41:%.*]] = or <4 x i1> [[TMP39]], [[TMP25]]
+; FORCED-NEXT:    [[TMP42:%.*]] = or <4 x i1> [[TMP40]], [[TMP26]]
+; FORCED-NEXT:    [[TMP43:%.*]] = or <4 x i1> [[TMP41]], [[TMP41]]
+; FORCED-NEXT:    [[TMP44:%.*]] = or <4 x i1> [[TMP42]], [[TMP42]]
+; FORCED-NEXT:    [[TMP45:%.*]] = or <4 x i1> [[TMP43]], [[TMP41]]
+; FORCED-NEXT:    [[TMP46:%.*]] = or <4 x i1> [[TMP44]], [[TMP42]]
+; FORCED-NEXT:    [[TMP47:%.*]] = or <4 x i1> [[TMP45]], [[TMP41]]
+; FORCED-NEXT:    [[TMP48:%.*]] = or <4 x i1> [[TMP46]], [[TMP42]]
+; FORCED-NEXT:    [[TMP49:%.*]] = or <4 x i1> [[TMP47]], [[TMP41]]
+; FORCED-NEXT:    [[TMP50:%.*]] = or <4 x i1> [[TMP48]], [[TMP42]]
+; FORCED-NEXT:    [[TMP51:%.*]] = or <4 x i1> [[TMP49]], [[TMP41]]
+; FORCED-NEXT:    [[TMP52:%.*]] = or <4 x i1> [[TMP50]], [[TMP42]]
+; FORCED-NEXT:    [[TMP53:%.*]] = or <4 x i1> [[TMP51]], [[TMP41]]
+; FORCED-NEXT:    [[TMP54:%.*]] = or <4 x i1> [[TMP52]], [[TMP42]]
+; FORCED-NEXT:    [[TMP55:%.*]] = or <4 x i1> [[TMP53]], [[TMP41]]
+; FORCED-NEXT:    [[TMP56:%.*]] = or <4 x i1> [[TMP54]], [[TMP42]]
+; FORCED-NEXT:    [[TMP57:%.*]] = or <4 x i1> [[TMP55]], [[TMP41]]
+; FORCED-NEXT:    [[TMP58:%.*]] = or <4 x i1> [[TMP56]], [[TMP42]]
----------------
fhahn wrote:

Will be cleaned up in follow-up, thanks! 

https://github.com/llvm/llvm-project/pull/99808


More information about the llvm-commits mailing list