[llvm] d4c070d - [VectorCombine] Freeze index unless it is known to be non-poison.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 1 02:41:30 PDT 2021


Author: Florian Hahn
Date: 2021-06-01T10:40:57+01:00
New Revision: d4c070d801413186c5a59cede9d721e9ca099708

URL: https://github.com/llvm/llvm-project/commit/d4c070d801413186c5a59cede9d721e9ca099708
DIFF: https://github.com/llvm/llvm-project/commit/d4c070d801413186c5a59cede9d721e9ca099708.diff

LOG: [VectorCombine] Freeze index unless it is known to be non-poison.

If the index itself is already poison, the poison propagates through
instructions clamping the index to a valid range. This still causes
introducing a load of poison, as flagged by Alive2 and pointed out
at 575e2aff5574.

This patch updates the code to freeze the index, unless it is proven to
not be poison.

Reviewed By: nlopes

Differential Revision: https://reviews.llvm.org/D103378

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VectorCombine.cpp
    llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 9f3ca224f846..66899459bf17 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -911,8 +911,13 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
   for (User *U : LI->users()) {
     auto *EI = cast<ExtractElementInst>(U);
     Builder.SetInsertPoint(EI);
-    Value *GEP = Builder.CreateInBoundsGEP(
-        FixedVT, Ptr, {Builder.getInt32(0), EI->getOperand(1)});
+
+    Value *Idx = EI->getOperand(1);
+    if (!isGuaranteedNotToBePoison(Idx, &AC, LI, &DT))
+      Idx = Builder.CreateFreeze(Idx);
+
+    Value *GEP =
+        Builder.CreateInBoundsGEP(FixedVT, Ptr, {Builder.getInt32(0), Idx});
     auto *NewLoad = cast<LoadInst>(Builder.CreateLoad(
         FixedVT->getElementType(), GEP, EI->getName() + ".scalar"));
 

diff  --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
index c80362a9e611..e603b2070ecd 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
@@ -147,8 +147,9 @@ define i32 @load_extract_idx_var_i64_known_valid_by_and(<4 x i32>* %x, i64 %idx)
 ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_and(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 3
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]]
-; CHECK-NEXT:    [[R:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i64 [[IDX_CLAMPED]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[R:%.*]] = load i32, i32* [[TMP1]], align 4
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 entry:
@@ -192,8 +193,9 @@ define i32 @load_extract_idx_var_i64_known_valid_by_urem(<4 x i32>* %x, i64 %idx
 ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 4
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]]
-; CHECK-NEXT:    [[R:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i64 [[IDX_CLAMPED]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[R:%.*]] = load i32, i32* [[TMP1]], align 4
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 entry:
@@ -569,10 +571,12 @@ define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_
 ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and(
 ; CHECK-NEXT:    [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0:%.*]], 15
 ; CHECK-NEXT:    [[IDX_1_CLAMPED:%.*]] = and i64 [[IDX_1:%.*]], 15
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X:%.*]], i32 0, i64 [[IDX_0_CLAMPED]]
-; CHECK-NEXT:    [[E_0:%.*]] = load i32, i32* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X]], i32 0, i64 [[IDX_1_CLAMPED]]
-; CHECK-NEXT:    [[E_1:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i64 [[IDX_0_CLAMPED]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X:%.*]], i32 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[E_0:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = freeze i64 [[IDX_1_CLAMPED]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X]], i32 0, i64 [[TMP3]]
+; CHECK-NEXT:    [[E_1:%.*]] = load i32, i32* [[TMP4]], align 4
 ; CHECK-NEXT:    [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
@@ -590,10 +594,11 @@ define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_
 ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and_some_noundef(
 ; CHECK-NEXT:    [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0:%.*]], 15
 ; CHECK-NEXT:    [[IDX_1_CLAMPED:%.*]] = and i64 [[IDX_1:%.*]], 15
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X:%.*]], i32 0, i64 [[IDX_0_CLAMPED]]
-; CHECK-NEXT:    [[E_0:%.*]] = load i32, i32* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X]], i32 0, i64 [[IDX_1_CLAMPED]]
-; CHECK-NEXT:    [[E_1:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i64 [[IDX_0_CLAMPED]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X:%.*]], i32 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[E_0:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X]], i32 0, i64 [[IDX_1_CLAMPED]]
+; CHECK-NEXT:    [[E_1:%.*]] = load i32, i32* [[TMP3]], align 4
 ; CHECK-NEXT:    [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;


        


More information about the llvm-commits mailing list