[llvm] 7647822 - [AArch64][SVE] Remove i1 type from isElementTypeLegalForScalableVector

Fri Nov 12 06:25:21 PST 2021

Author: Kerry McLaughlin
Date: 2021-11-12T14:24:38Z
New Revision: 76478221565167da286bdc2d6a13eb241563e31c

URL: https://github.com/llvm/llvm-project/commit/76478221565167da286bdc2d6a13eb241563e31c
DIFF: https://github.com/llvm/llvm-project/commit/76478221565167da286bdc2d6a13eb241563e31c.diff

LOG: [AArch64][SVE] Remove i1 type from isElementTypeLegalForScalableVector

`collectElementTypesForWidening` collects the types of load, store and
reduction Phis in a loop. These types are later checked using
`isElementTypeLegalForScalableVector` to prevent vectorisation of
loops with instruction types that are unsupported.

This patch removes i1 from the list of types supported for scalable
vectors. This fixes an assert ("Cannot yet scalarize uniform stores") in
`setCostBasedWideningDecision` when we have a loop containing a uniform
i1 store and a scalable VF, which we cannot create a scatter for.

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D113680

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
    llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index c4e20bb12f8cf..d1e8cd204b3a7 100644

--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -226,7 +226,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
     if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
       return true;
 
-    if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
+    if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
         Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
       return true;
 
@@ -241,8 +241,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
     if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
       return false; // Fall back to scalarization of masked operations.
 
-    return !DataType->getScalarType()->isIntegerTy(1) &&
-           isElementTypeLegalForScalableVector(DataType->getScalarType());
+    return isElementTypeLegalForScalableVector(DataType->getScalarType());
   }
 
   bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
@@ -263,8 +262,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
                          DataTypeFVTy->getNumElements() < 2))
       return false;
 
-    return !DataType->getScalarType()->isIntegerTy(1) &&
-           isElementTypeLegalForScalableVector(DataType->getScalarType());
+    return isElementTypeLegalForScalableVector(DataType->getScalarType());
   }
 
   bool isLegalMaskedGather(Type *DataType, Align Alignment) const {

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
index b13cd9cff9dbb..e529d2e2121ca 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
@@ -79,6 +79,35 @@ for.end:
   ret void
 }
 
+; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
+define void @uniform_store_i1(i1* noalias %dst, i64* noalias %start, i64 %N) {
+; CHECK-LABEL: @uniform_store_i1
+; CHECK: vector.body
+; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, <2 x i64*> {{.*}}, i64 1
+; CHECK: %[[ICMP:.*]] = icmp eq <2 x i64*> %[[GEP]], %[[SPLAT:.*]]
+; CHECK: %[[EXTRACT1:.*]] = extractelement <2 x i1> %[[ICMP]], i32 0
+; CHECK: store i1 %[[EXTRACT1]], i1* %dst
+; CHECK: %[[EXTRACT2:.*]] = extractelement <2 x i1> %[[ICMP]], i32 1
+; CHECK: store i1 %[[EXTRACT2]], i1* %dst
+; CHECK-NOT: vscale
+entry:
+  br label %for.body
+
+for.body:
+  %first.sroa = phi i64* [ %incdec.ptr, %for.body ], [ %start, %entry ]
+  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i64 %iv, 1
+  %0 = load i64, i64* %first.sroa
+  %incdec.ptr = getelementptr inbounds i64, i64* %first.sroa, i64 1
+  %cmp.not = icmp eq i64* %incdec.ptr, %start
+  store i1 %cmp.not, i1* %dst
+  %cmp = icmp ult i64 %iv, %N
+  br i1 %cmp, label %for.body, label %end, !llvm.loop !0
+
+end:
+  ret void
+}
+
 define dso_local void @loop_fixed_width_i128(i128* nocapture %ptr, i64 %N) {
 ; CHECK-LABEL: @loop_fixed_width_i128
 ; CHECK: load <4 x i128>, <4 x i128>*