[llvm] 3b82397 - [VectorCombine] Check for non-byte-sized element type

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 28 05:18:40 PDT 2023


Author: Nikita Popov
Date: 2023-09-28T14:18:30+02:00
New Revision: 3b8239796522f8e51483760c4a531bc64fa8ad69

URL: https://github.com/llvm/llvm-project/commit/3b8239796522f8e51483760c4a531bc64fa8ad69
DIFF: https://github.com/llvm/llvm-project/commit/3b8239796522f8e51483760c4a531bc64fa8ad69.diff

LOG: [VectorCombine] Check for non-byte-sized element type

We should check whether the element type is non-byte-sized, not
the vector type. For types like <32 x i1> the whole type is
byte-sized, but the individual elements (that we scalarize to)
are not.

Fixes https://github.com/llvm/llvm-project/issues/67060.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VectorCombine.cpp
    llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
    llvm/test/Transforms/VectorCombine/load-insert-store.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index da192bf2fd560d0..9b9aaa22a71c6be 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1206,7 +1206,7 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
     // Don't optimize for atomic/volatile load or store. Ensure memory is not
     // modified between, vector type matches store size, and index is inbounds.
     if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
-        !DL.typeSizeEqualsStoreSize(Load->getType()) ||
+        !DL.typeSizeEqualsStoreSize(Load->getType()->getScalarType()) ||
         SrcAddr != SI->getPointerOperand()->stripPointerCasts())
       return false;
 
@@ -1244,7 +1244,7 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
   auto *VecTy = cast<VectorType>(I.getType());
   auto *LI = cast<LoadInst>(&I);
   const DataLayout &DL = I.getModule()->getDataLayout();
-  if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(VecTy))
+  if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(VecTy->getScalarType()))
     return false;
 
   InstructionCost OriginalCost =

diff  --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
index c68c5c6e7593d9e..7d1c7a6855d7619 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
@@ -656,11 +656,10 @@ define i31 @load_with_non_power_of_2_element_type(ptr %x) {
   ret i31 %r
 }
 
-; FIXME: This is a miscompile.
 define i1 @load_with_non_power_of_2_element_type_2(ptr %x) {
 ; CHECK-LABEL: @load_with_non_power_of_2_element_type_2(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <8 x i1>, ptr [[X:%.*]], i32 0, i32 1
-; CHECK-NEXT:    [[R:%.*]] = load i1, ptr [[TMP1]], align 1
+; CHECK-NEXT:    [[LV:%.*]] = load <8 x i1>, ptr [[X:%.*]], align 1
+; CHECK-NEXT:    [[R:%.*]] = extractelement <8 x i1> [[LV]], i32 1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %lv = load <8 x i1>, ptr %x

diff  --git a/llvm/test/Transforms/VectorCombine/load-insert-store.ll b/llvm/test/Transforms/VectorCombine/load-insert-store.ll
index c10e97ce8ef3c10..acf8b07a52cb07d 100644
--- a/llvm/test/Transforms/VectorCombine/load-insert-store.ll
+++ b/llvm/test/Transforms/VectorCombine/load-insert-store.ll
@@ -107,11 +107,11 @@ entry:
   ret void
 }
 
-; FIXME: This is a miscompile.
 define void @insert_store_v32i1(ptr %p) {
 ; CHECK-LABEL: @insert_store_v32i1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <32 x i1>, ptr [[P:%.*]], i64 0, i64 0
-; CHECK-NEXT:    store i1 true, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[VEC:%.*]] = load <32 x i1>, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <32 x i1> [[VEC]], i1 true, i64 0
+; CHECK-NEXT:    store <32 x i1> [[INS]], ptr [[P]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %vec = load <32 x i1>, ptr %p


        


More information about the llvm-commits mailing list