[llvm] [LV] Check for vector-to-scalar casts in legalizer (PR #106244)

Fri Sep 6 01:15:58 PDT 2024

https://github.com/ErikHogeman updated https://github.com/llvm/llvm-project/pull/106244

>From e4cd83669f809a88937a1eeec99a13e3c704960c Mon Sep 17 00:00:00 2001
From: Erik Hogeman <erik.hogeman at arm.com>
Date: Tue, 27 Aug 2024 18:20:23 +0200
Subject: [PATCH] [LoopVectorize] Check for vector-to-scalar casts in legalizer

The code makes assumptions later on the operations and
their inputs being scalar in the loops that are processed,
so we should make sure this is the case in the legalizer.
---
 .../Vectorize/LoopVectorizationLegality.cpp   |  3 ++
 .../LoopVectorize/vector-to-scalar-cast.ll    | 40 +++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/vector-to-scalar-cast.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 7042af6dd8eae5..7062e21383a5fc 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -943,9 +943,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         VecCallVariantsFound = true;
 
       // Check that the instruction return type is vectorizable.
+      // We can't vectorize casts from vector type to scalar type.
       // Also, we can't vectorize extractelement instructions.
       if ((!VectorType::isValidElementType(I.getType()) &&
            !I.getType()->isVoidTy()) ||
+          (isa<CastInst>(I) &&
+           !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
           isa<ExtractElementInst>(I)) {
         reportVectorizationFailure("Found unvectorizable type",
             "instruction return type cannot be vectorized",
diff --git a/llvm/test/Transforms/LoopVectorize/vector-to-scalar-cast.ll b/llvm/test/Transforms/LoopVectorize/vector-to-scalar-cast.ll
new file mode 100644
index 00000000000000..a6dcd4d1cfc114
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/vector-to-scalar-cast.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; The test was crashing earlier due to a vectorization attempt; vector-to-scalar cast
+; is now forbidden in the legalizer, and the test isn't a vectorization candidate now.
+; RUN: opt -S -force-widen-divrem-via-safe-divisor=false -force-vector-width=4 --passes=loop-vectorize < %s | FileCheck %s
+
+define void @vector_to_scalar_cast(ptr %out) {
+; CHECK-LABEL: define void @vector_to_scalar_cast(
+; CHECK-SAME: ptr [[OUT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[VEC0:%.*]] = insertelement <2 x i16> undef, i16 0, i64 0
+; CHECK-NEXT:    [[VEC1:%.*]] = insertelement <2 x i16> [[VEC0]], i16 0, i64 1
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV]], 11
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[VEC_SCALAR_CAST:%.*]] = bitcast <2 x i16> [[VEC1]] to i32
+; CHECK-NEXT:    [[SREM:%.*]] = srem i32 0, [[VEC_SCALAR_CAST]]
+; CHECK-NEXT:    store i32 [[SREM]], ptr [[OUT]], align 4
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %vec0 = insertelement <2 x i16> undef, i16 0, i64 0
+  %vec1 = insertelement <2 x i16> %vec0, i16 0, i64 1
+  br label %loop
+
+loop:
+  %iv = phi i32 [ %iv.next, %loop ], [ 1, %entry ]
+  %exitcond = icmp ne i32 %iv, 11
+  %iv.next = add nuw nsw i32 %iv, 1
+  %vec.scalar.cast = bitcast <2 x i16> %vec1 to i32
+  %srem = srem i32 0, %vec.scalar.cast
+  store i32 %srem, ptr %out
+  br i1 %exitcond, label %loop, label %exit
+
+exit:
+  ret void
+}