[PATCH] D77387: [ARM] Fix conditions for lowering to S[LR]I

Fri Apr 3 06:25:07 PDT 2020

PetreTudor created this revision.
Herald added subscribers: llvm-commits, hiraditya, kristof.beyls.
Herald added a project: LLVM.

Fixed wrong conditions for generating (S[LR]I X, Y, C2) from
(or (and X, BvecC1), (lsl Y, C2)). The optimisation is also
enabled by default now.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D77387

Files:
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll


Index: llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
===================================================================

--- llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
+++ llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -aarch64-shift-insert-generation=true -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define void @testLeftGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
 ; CHECK-LABEL: testLeftGood:
 ; CHECK: sli.16b v0, v1, #3
-  %and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252>
+  %and.i = and <16 x i8> %src1, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
   %vshl_n = shl <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   %result = or <16 x i8> %and.i, %vshl_n
   store <16 x i8> %result, <16 x i8>* %dest, align 16
@@ -23,7 +23,7 @@
 define void @testRightGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
 ; CHECK-LABEL: testRightGood:
 ; CHECK: sri.16b v0, v1, #3
-  %and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252>
+  %and.i = and <16 x i8> %src1, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
   %vshl_n = lshr <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   %result = or <16 x i8> %and.i, %vshl_n
   store <16 x i8> %result, <16 x i8>* %dest, align 16
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -99,11 +99,6 @@
 STATISTIC(NumShiftInserts, "Number of vector shift inserts");
 STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
 
-static cl::opt<bool>
-EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
-                           cl::desc("Allow AArch64 SLI/SRI formation"),
-                           cl::init(false));
-
 // FIXME: The necessary dtprel relocations don't seem to be supported
 // well in the GNU bfd and gold linkers at the moment. Therefore, by
 // default, for now, fall back to GeneralDynamic code generation.
@@ -7893,8 +7888,9 @@
 
 // Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
 // to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
-// BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2.
-// Also, logical shift right -> sri, with the same structure.
+// BUILD_VECTORs with constant element C1, C2 is a constant, and:
+//   - for the SLI case: C1 == Ones(ElemSizeInBits) >> (ElemSizeInBits - C2)
+//   - for the SRI case: C1 == Ones(ElemSizeInBits) << (ElemSizeInBits - C2)
 static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
   EVT VT = N->getValueType(0);
 
@@ -7927,15 +7923,21 @@
   if (!isAllConstantBuildVector(And.getOperand(1), C1))
     return SDValue();
 
-  // Is C1 == ~C2, taking into account how much one can shift elements of a
-  // particular size?
+  // Is C1 == Ones(ElemSizeInBits) << (ElemSizeInBits - C2) or
+  // C1 == Ones(ElemSizeInBits) >> (ElemSizeInBits - C2), taking into account
+  // how much one can shift elements of a particular size?
   uint64_t C2 = C2node->getZExtValue();
   unsigned ElemSizeInBits = VT.getScalarSizeInBits();
   if (C2 > ElemSizeInBits)
     return SDValue();
   unsigned ElemMask = (1 << ElemSizeInBits) - 1;
-  if ((C1 & ElemMask) != (~C2 & ElemMask))
-    return SDValue();
+  if (IsShiftRight) {
+    if ((C1 & ElemMask) != ((ElemMask << (ElemSizeInBits - C2)) & ElemMask))
+      return SDValue();
+  } else {
+    if ((C1 & ElemMask) != ((ElemMask >> (ElemSizeInBits - C2)) & ElemMask))
+      return SDValue();
+  }
 
   SDValue X = And.getOperand(0);
   SDValue Y = Shift.getOperand(0);
@@ -7959,10 +7961,8 @@
 SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
                                              SelectionDAG &DAG) const {
   // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
-  if (EnableAArch64SlrGeneration) {
-    if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
-      return Res;
-  }
+  if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
+    return Res;
 
   EVT VT = Op.getValueType();
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D77387.254770.patch
Type: text/x-patch
Size: 4720 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200403/e5366f10/attachment.bin>