[llvm] [LV] Support generating masks for switch terminators. (PR #99808)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 8 10:24:15 PDT 2024
================
@@ -104,9 +181,62 @@ define void @switch_all_dests_distinct(ptr %start, ptr %end) {
; FORCED-LABEL: define void @switch_all_dests_distinct(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; FORCED-NEXT: [[ENTRY:.*]]:
+; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
+; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
+; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
+; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
+; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; FORCED: [[VECTOR_PH]]:
+; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
+; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
+; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
+; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
+; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
+; FORCED: [[VECTOR_BODY]]:
+; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
+; FORCED-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
+; FORCED-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 32
+; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
+; FORCED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]]
+; FORCED-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
+; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
+; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
+; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
+; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
+; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
+; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
+; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
+; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
+; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
+; FORCED-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
+; FORCED-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
+; FORCED-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP13]]
+; FORCED-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP14]]
+; FORCED-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], <i1 true, i1 true, i1 true, i1 true>
+; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], <i1 true, i1 true, i1 true, i1 true>
+; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP13]])
+; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP14]])
+; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP11]])
+; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP12]])
+; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]])
+; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]])
+; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP19]])
+; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP20]])
----------------
ayalz wrote:
(This calls for store sinking, producing a single unmasked store fed by selects.)
https://github.com/llvm/llvm-project/pull/99808
More information about the llvm-commits
mailing list