[llvm] [VectorCombine] Enable transform 'scalarizeLoadExtract' for non constant indexes (PR #65445)
Ben Shi via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 6 21:45:58 PDT 2023
================
@@ -213,14 +265,54 @@ entry:
ret i32 %r
}
+define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_0(ptr %x, i64 %idx) {
+; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 5
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
+; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+entry:
+ %cmp = icmp ult i64 %idx, 5
+ call void @llvm.assume(i1 %cmp)
+ %lv = load <vscale x 4 x i32>, ptr %x
+ %r = extractelement <vscale x 4 x i32> %lv, i64 %idx
+ ret i32 %r
+}
+
+define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_1(ptr %x, i64 %idx) {
+; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VS:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[VM:%.*]] = mul i64 [[VS]], 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], [[VM]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
+; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+entry:
+ %vs = call i64 @llvm.vscale.i64()
+ %vm = mul i64 %vs, 4
+ %cmp = icmp ult i64 %idx, %vm
+ call void @llvm.assume(i1 %cmp)
+ %lv = load <vscale x 4 x i32>, ptr %x
+ %r = extractelement <vscale x 4 x i32> %lv, i64 %idx
+ ret i32 %r
+}
+
+declare i64 @llvm.vscale.i64()
declare void @llvm.assume(i1)
define i32 @load_extract_idx_var_i64_known_valid_by_and(ptr %x, i64 %idx) {
; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_and(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 3
-; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_CLAMPED]]
+; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]]
----------------
benshi001 wrote:
This transform is safe and enabled by my change.
https://github.com/llvm/llvm-project/pull/65445
More information about the llvm-commits
mailing list