[llvm] f53234c - [AggressiveInstCombine] Fix invalid TypeSize conversion when combining loads.

Sat Dec 17 07:37:36 PST 2022

Author: Paul Walker
Date: 2022-12-17T15:34:27Z
New Revision: f53234cbfd0053f744f4825b10f56e108b2b8673

URL: https://github.com/llvm/llvm-project/commit/f53234cbfd0053f744f4825b10f56e108b2b8673
DIFF: https://github.com/llvm/llvm-project/commit/f53234cbfd0053f744f4825b10f56e108b2b8673.diff

LOG: [AggressiveInstCombine] Fix invalid TypeSize conversion when combining loads.

Much of foldLoadsRecursive relies on knowing the size of loaded
data, which is not possible for scalable vector types.  However,
the logic of combining two small loads into one bigger load does
not apply for vector types so rather than converting the algorithm
to use TypeSize I've simply added an early exit for vectors.

Fixes #59510

Differential Revision: https://reviews.llvm.org/D140106

Added: 
    llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll

Modified: 
    llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 5da7dbb8c6d56..473b41241b8a6 100644

--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -769,6 +769,10 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
 // of the loads is to form a wider load.
 static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
                                  TargetTransformInfo &TTI, AliasAnalysis &AA) {
+  // Only consider load chains of scalar values.
+  if (isa<VectorType>(I.getType()))
+    return false;
+
   LoadOps LOps;
   if (!foldLoadsRecursive(&I, LOps, DL, AA) || !LOps.FoundRoot)
     return false;

diff  --git a/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll
new file mode 100644
index 0000000000000..f7d48ce1e099c
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s
+
+; Use the same idoim as X86/or-load.ll @loadCombine_2consecutive to represent a
+; sequence of operations that can be replaced by a single double-width load when
+; using scalar types but whose logic does not apply to fixed length vectors.
+define <8 x i16> @or-load-fixed-length-vector(ptr %p1) {
+; CHECK-LABEL: @or-load-fixed-length-vector(
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P1:%.*]], i32 1
+; CHECK-NEXT:    [[L1:%.*]] = load <8 x i8>, ptr [[P1]], align 1
+; CHECK-NEXT:    [[L2:%.*]] = load <8 x i8>, ptr [[P2]], align 1
+; CHECK-NEXT:    [[E1:%.*]] = zext <8 x i8> [[L1]] to <8 x i16>
+; CHECK-NEXT:    [[E2:%.*]] = zext <8 x i8> [[L2]] to <8 x i16>
+; CHECK-NEXT:    [[S2:%.*]] = shl <8 x i16> [[E2]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[OR:%.*]] = or <8 x i16> [[E1]], [[S2]]
+; CHECK-NEXT:    ret <8 x i16> [[OR]]
+;
+  %p2 = getelementptr i8, ptr %p1, i32 1
+  %l1 = load <8 x i8>, ptr %p1, align 1
+  %l2 = load <8 x i8>, ptr %p2, align 1
+  %e1 = zext <8 x i8> %l1 to <8 x i16>
+  %e2 = zext <8 x i8> %l2 to <8 x i16>
+  %s2 = shl <8 x i16> %e2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %or = or <8 x i16> %e1, %s2
+  ret <8 x i16> %or
+}
+
+; Use the same idoim as X86/or-load.ll @loadCombine_2consecutive to represent a
+; sequence of operations that can be replaced by a single double-width load when
+; using scalar types but whose logic does not apply to scalable length vectors.
+define <vscale x 8 x i16> @or-load-scalable-vector(ptr %p1) {
+; CHECK-LABEL: @or-load-scalable-vector(
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P1:%.*]], i32 1
+; CHECK-NEXT:    [[L1:%.*]] = load <vscale x 8 x i8>, ptr [[P1]], align 1
+; CHECK-NEXT:    [[L2:%.*]] = load <vscale x 8 x i8>, ptr [[P2]], align 1
+; CHECK-NEXT:    [[E1:%.*]] = zext <vscale x 8 x i8> [[L1]] to <vscale x 8 x i16>
+; CHECK-NEXT:    [[E2:%.*]] = zext <vscale x 8 x i8> [[L2]] to <vscale x 8 x i16>
+; CHECK-NEXT:    [[S2:%.*]] = shl <vscale x 8 x i16> [[E2]], shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 8, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[OR:%.*]] = or <vscale x 8 x i16> [[E1]], [[S2]]
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[OR]]
+;
+  %p2 = getelementptr i8, ptr %p1, i32 1
+  %l1 = load <vscale x 8 x i8>, ptr %p1, align 1
+  %l2 = load <vscale x 8 x i8>, ptr %p2, align 1
+  %e1 = zext <vscale x 8 x i8> %l1 to <vscale x 8 x i16>
+  %e2 = zext <vscale x 8 x i8> %l2 to <vscale x 8 x i16>
+  %s2 = shl <vscale x 8 x i16> %e2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 8, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %or = or <vscale x 8 x i16> %e1, %s2
+  ret <vscale x 8 x i16> %or
+}