[llvm] 4efb4f6 - [VectorCombine] Add positive test for scalarizing multiple extracts.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Fri May 21 05:40:49 PDT 2021


Author: Florian Hahn
Date: 2021-05-21T13:39:37+01:00
New Revision: 4efb4f674cb6798949d943bd5755798a7f80e3d9

URL: https://github.com/llvm/llvm-project/commit/4efb4f674cb6798949d943bd5755798a7f80e3d9
DIFF: https://github.com/llvm/llvm-project/commit/4efb4f674cb6798949d943bd5755798a7f80e3d9.diff

LOG: [VectorCombine] Add positive test for scalarizing multiple extracts.

As suggested in D100273. Also adds an out-of-bound access test

Added: 
    

Modified: 
    llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
index 5e105031ec787..8e747a40ec315 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
@@ -45,6 +45,19 @@ define i32 @load_extract_idx_3(<4 x i32>* %x) {
   ret i32 %r
 }
 
+; Out-of-bounds index for extractelement, should not be converted to narrow
+; load, because it would introduce a dereference of a poison pointer.
+define i32 @load_extract_idx_4(<4 x i32>* %x) {
+; CHECK-LABEL: @load_extract_idx_4(
+; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 4
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %lv = load <4 x i32>, <4 x i32>* %x
+  %r = extractelement <4 x i32> %lv, i32 4
+  ret i32 %r
+}
+
 define i32 @load_extract_idx_var_i64(<4 x i32>* %x, i64 %idx) {
 ; CHECK-LABEL: @load_extract_idx_var_i64(
 ; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16
@@ -369,6 +382,23 @@ define i32 @load_multiple_extracts_with_constant_idx(<4 x i32>* %x) {
   ret i32 %res
 }
 
+; Scalarizing the load for multiple extracts is profitable in this case,
+; because the vector large vector requires 2 vector registers.
+define i32 @load_multiple_extracts_with_constant_idx_profitable(<8 x i32>* %x) {
+; CHECK-LABEL: @load_multiple_extracts_with_constant_idx_profitable(
+; CHECK-NEXT:    [[LV:%.*]] = load <8 x i32>, <8 x i32>* [[X:%.*]], align 32
+; CHECK-NEXT:    [[E_0:%.*]] = extractelement <8 x i32> [[LV]], i32 0
+; CHECK-NEXT:    [[E_1:%.*]] = extractelement <8 x i32> [[LV]], i32 6
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %lv = load <8 x i32>, <8 x i32>* %x
+  %e.0 = extractelement <8 x i32> %lv, i32 0
+  %e.1 = extractelement <8 x i32> %lv, i32 6
+  %res = add i32 %e.0, %e.1
+  ret i32 %res
+}
+
 ; Scalarizing may or may not be profitable, depending on the target.
 define i32 @load_multiple_2_with_variable_indices(<4 x i32>* %x, i64 %idx.0, i64 %idx.1) {
 ; CHECK-LABEL: @load_multiple_2_with_variable_indices(


        


More information about the llvm-commits mailing list