[llvm] 4efb4f6 - [VectorCombine] Add positive test for scalarizing multiple extracts.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri May 21 05:40:49 PDT 2021
Author: Florian Hahn
Date: 2021-05-21T13:39:37+01:00
New Revision: 4efb4f674cb6798949d943bd5755798a7f80e3d9
URL: https://github.com/llvm/llvm-project/commit/4efb4f674cb6798949d943bd5755798a7f80e3d9
DIFF: https://github.com/llvm/llvm-project/commit/4efb4f674cb6798949d943bd5755798a7f80e3d9.diff
LOG: [VectorCombine] Add positive test for scalarizing multiple extracts.
As suggested in D100273. Also adds an out-of-bound access test
Added:
Modified:
llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
index 5e105031ec787..8e747a40ec315 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
@@ -45,6 +45,19 @@ define i32 @load_extract_idx_3(<4 x i32>* %x) {
ret i32 %r
}
+; Out-of-bounds index for extractelement, should not be converted to narrow
+; load, because it would introduce a dereference of a poison pointer.
+define i32 @load_extract_idx_4(<4 x i32>* %x) {
+; CHECK-LABEL: @load_extract_idx_4(
+; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16
+; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 4
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %lv = load <4 x i32>, <4 x i32>* %x
+ %r = extractelement <4 x i32> %lv, i32 4
+ ret i32 %r
+}
+
define i32 @load_extract_idx_var_i64(<4 x i32>* %x, i64 %idx) {
; CHECK-LABEL: @load_extract_idx_var_i64(
; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16
@@ -369,6 +382,23 @@ define i32 @load_multiple_extracts_with_constant_idx(<4 x i32>* %x) {
ret i32 %res
}
+; Scalarizing the load for multiple extracts is profitable in this case,
+; because the vector large vector requires 2 vector registers.
+define i32 @load_multiple_extracts_with_constant_idx_profitable(<8 x i32>* %x) {
+; CHECK-LABEL: @load_multiple_extracts_with_constant_idx_profitable(
+; CHECK-NEXT: [[LV:%.*]] = load <8 x i32>, <8 x i32>* [[X:%.*]], align 32
+; CHECK-NEXT: [[E_0:%.*]] = extractelement <8 x i32> [[LV]], i32 0
+; CHECK-NEXT: [[E_1:%.*]] = extractelement <8 x i32> [[LV]], i32 6
+; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %lv = load <8 x i32>, <8 x i32>* %x
+ %e.0 = extractelement <8 x i32> %lv, i32 0
+ %e.1 = extractelement <8 x i32> %lv, i32 6
+ %res = add i32 %e.0, %e.1
+ ret i32 %res
+}
+
; Scalarizing may or may not be profitable, depending on the target.
define i32 @load_multiple_2_with_variable_indices(<4 x i32>* %x, i64 %idx.0, i64 %idx.1) {
; CHECK-LABEL: @load_multiple_2_with_variable_indices(
More information about the llvm-commits
mailing list