[llvm-branch-commits] [llvm] f5fcbe4 - [AArch64] Further restricts when a dup(*ext) can be rearranged

Mon Jan 18 08:06:05 PST 2021

Author: Nicholas Guy
Date: 2021-01-18T16:00:21Z
New Revision: f5fcbe4e3c68584ef4858590a079f17593feabbd

URL: https://github.com/llvm/llvm-project/commit/f5fcbe4e3c68584ef4858590a079f17593feabbd
DIFF: https://github.com/llvm/llvm-project/commit/f5fcbe4e3c68584ef4858590a079f17593feabbd.diff

LOG: [AArch64] Further restricts when a dup(*ext) can be rearranged

In most cases, the dup(*ext) pattern can be rearranged to perform
the extension on the vector side, allowing for further vector-specific
optimisations to be made. However the initial checks for this conversion
were insufficient, allowing invalid encodings to be attempted (causing
compilation to fail).

Differential Revision: https://reviews.llvm.org/D94778

Added: 
    llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6e4ac0f711dd..39c40ef0b36d 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11843,7 +11843,8 @@ static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
 
   SDValue InsertVectorNode = DAG.getNode(
       InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
-      Extend.getOperand(0), DAG.getConstant(0, DL, MVT::i64));
+      DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
+      DAG.getConstant(0, DL, MVT::i64));
 
   std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());
 
@@ -11851,9 +11852,8 @@ static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
       DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
                            DAG.getUNDEF(PreExtendVT), ShuffleMask);
 
-  SDValue ExtendNode =
-      DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, TargetType,
-                  VectorShuffleNode, DAG.getValueType(TargetType));
+  SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+                                   DL, TargetType, VectorShuffleNode);
 
   return ExtendNode;
 }

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll
new file mode 100644
index 000000000000..51f91aa1b940
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -o -| FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+; This test covers a case where an AArch64 DUP instruction is generated with an
+; invalid encoding, resulting in a crash. We don't care about the specific output
+; here, only that this case no longer causes said crash.
+define dso_local i32 @dupext_crashtest(i32 %e) local_unnamed_addr {
+; CHECK-LABEL: dupext_crashtest:
+for.body.lr.ph:
+  %conv314 = zext i32 %e to i64
+  br label %vector.memcheck
+
+vector.memcheck:                                  ; preds = %for.body.lr.ph
+  br label %vector.ph
+
+vector.ph:                                        ; preds = %vector.memcheck
+  %broadcast.splatinsert = insertelement <2 x i64> poison, i64 %conv314, i32 0
+  %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %wide.load = load <2 x i32>, <2 x i32>* undef, align 4
+  %0 = zext <2 x i32> %wide.load to <2 x i64>
+  %1 = mul nuw <2 x i64> %broadcast.splat, %0
+  %2 = trunc <2 x i64> %1 to <2 x i32>
+  %3 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> %2
+  %4 = bitcast i32* undef to <2 x i32>*
+  store <2 x i32> %3, <2 x i32>* %4, align 4
+  br label %vector.body
+}