[llvm] r345102 - SelectionDAG: Reuse bigger sized constants in memset expansion.

Tue Oct 23 16:19:23 PDT 2018

Author: matze
Date: Tue Oct 23 16:19:23 2018
New Revision: 345102

URL: http://llvm.org/viewvc/llvm-project?rev=345102&view=rev
Log:
SelectionDAG: Reuse bigger sized constants in memset expansion.

When implementing memset's today we often see this pattern:
$x0 = MOV 0xXYXYXYXYXYXYXYXY
store $x0, ...
$w1 = MOV 0xXYXYXYXY
store $w1, ...

We first create a 64bit constant in a 64bit register with all bytes the
same and then create a 32bit constant with all bytes the same in a 32bit
register. In many targets we could just access the lower byte of the
64bit register instead.

- Ideally this would be handled by the ConstantHoist pass but it runs
  too early when memset isn't expanded yet.
- The memset expansion code already had this optimization implemented,
  however SelectionDAG constantfolding would constantfold the
  "trunc(bigconstnat)" pattern to "smallconstant".
- This patch makes the memset expansion mark the constant as Opaque and
  stop DAGCombiner from constant folding in this situation. (Similar to
  how ConstantHoisting marks things as Opaque to avoid folding
  ADD/SUB/etc.)

Differential Revision: https://reviews.llvm.org/D53181

Removed:
    llvm/trunk/test/CodeGen/X86/pr38771.ll
Modified:
    llvm/trunk/include/llvm/CodeGen/TargetLowering.h
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/test/CodeGen/AArch64/arm64-memset-inline.ll

Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=345102&r1=345101&r2=345102&view=diff
==============================================================================

--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Tue Oct 23 16:19:23 2018
@@ -2058,6 +2058,14 @@ public:
     return true;
   }
 
+  /// Return true if the specified immediate is legal for the value input of a
+  /// store instruction.
+  virtual bool isLegalStoreImmediate(int64_t Value) const {
+    // Default implementation assumes that at least 0 works since it is likely
+    // that a zero register exists or a zero immediate is allowed.
+    return Value == 0;
+  }
+
   /// Return true if it's significantly cheaper to shift a vector by a uniform
   /// scalar than by an amount which will vary across each lane. On x86, for
   /// example, there is a "psllw" instruction for the former case, but no simple

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=345102&r1=345101&r2=345102&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Oct 23 16:19:23 2018
@@ -15029,7 +15029,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *
 
   // FIXME: is there such a thing as a truncating indexed store?
   if (ST->isTruncatingStore() && ST->isUnindexed() &&
-      Value.getValueType().isInteger()) {
+      Value.getValueType().isInteger() &&
+      (!isa<ConstantSDNode>(Value) ||
+       !cast<ConstantSDNode>(Value)->isOpaque())) {
     // See if we can simplify the input to this truncstore with knowledge that
     // only the low bits are being used.  For example:
     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=345102&r1=345101&r2=345102&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Tue Oct 23 16:19:23 2018
@@ -3889,9 +3889,12 @@ SDValue SelectionDAG::getNode(unsigned O
     case ISD::SIGN_EXTEND:
       return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
                          C->isTargetOpcode(), C->isOpaque());
+    case ISD::TRUNCATE:
+      if (C->isOpaque())
+        break;
+      LLVM_FALLTHROUGH;
     case ISD::ANY_EXTEND:
     case ISD::ZERO_EXTEND:
-    case ISD::TRUNCATE:
       return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
                          C->isTargetOpcode(), C->isOpaque());
     case ISD::UINT_TO_FP:
@@ -5158,8 +5161,11 @@ static SDValue getMemsetValue(SDValue Va
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
     assert(C->getAPIntValue().getBitWidth() == 8);
     APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
-    if (VT.isInteger())
-      return DAG.getConstant(Val, dl, VT);
+    if (VT.isInteger()) {
+      bool IsOpaque = VT.getSizeInBits() > 64 ||
+          !DAG.getTargetLoweringInfo().isLegalStoreImmediate(C->getSExtValue());
+      return DAG.getConstant(Val, dl, VT, false, IsOpaque);
+    }
     return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl,
                              VT);
   }

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=345102&r1=345101&r2=345102&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Oct 23 16:19:23 2018
@@ -26890,6 +26890,10 @@ bool X86TargetLowering::isLegalAddImmedi
   return isInt<32>(Imm);
 }
 
+bool X86TargetLowering::isLegalStoreImmediate(int64_t Imm) const {
+  return isInt<32>(Imm);
+}
+
 bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
   if (!VT1.isInteger() || !VT2.isInteger())
     return false;

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=345102&r1=345101&r2=345102&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Oct 23 16:19:23 2018
@@ -940,6 +940,8 @@ namespace llvm {
     /// the immediate into a register.
     bool isLegalAddImmediate(int64_t Imm) const override;
 
+    bool isLegalStoreImmediate(int64_t Imm) const override;
+
     /// Return the cost of the scaling factor used in the addressing
     /// mode represented by AM for this target, for a load/store
     /// of the specified type.

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-memset-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-memset-inline.ll?rev=345102&r1=345101&r2=345102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-memset-inline.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-memset-inline.ll Tue Oct 23 16:19:23 2018
@@ -242,14 +242,12 @@ define void @memset_8_stack() {
   ret void
 }
 
-; FIXME This could be better: x9 is a superset of w8's bit-pattern.
 define void @memset_12_stack() {
 ; CHECK-LABEL: memset_12_stack:
-; CHECK:       mov w8, #-1431655766
-; CHECK-NEXT:  mov x9, #-6148914691236517206
+; CHECK:       mov x8, #-6148914691236517206
 ; CHECK-NEXT:  mov x0, sp
+; CHECK-NEXT:  str x8, [sp]
 ; CHECK-NEXT:  str w8, [sp, #8]
-; CHECK-NEXT:  str x9, [sp]
 ; CHECK-NEXT:  bl something
   %buf = alloca [12 x i8], align 1
   %cast = bitcast [12 x i8]* %buf to i8*
@@ -272,14 +270,12 @@ define void @memset_16_stack() {
   ret void
 }
 
-; FIXME This could be better: x9 is a superset of w8's bit-pattern.
 define void @memset_20_stack() {
 ; CHECK-LABEL: memset_20_stack:
-; CHECK:       mov w8, #-1431655766
-; CHECK-NEXT:  mov x9, #-6148914691236517206
+; CHECK:       mov x8, #-6148914691236517206
 ; CHECK-NEXT:  add x0, sp, #8
+; CHECK-NEXT:  stp x8, x8, [sp, #8]
 ; CHECK-NEXT:  str w8, [sp, #24]
-; CHECK-NEXT:  stp x9, x9, [sp, #8]
 ; CHECK-NEXT:  bl something
   %buf = alloca [20 x i8], align 1
   %cast = bitcast [20 x i8]* %buf to i8*
@@ -288,15 +284,13 @@ define void @memset_20_stack() {
   ret void
 }
 
-; FIXME This could be better: x9 is a superset of w8's bit-pattern.
 define void @memset_26_stack() {
 ; CHECK-LABEL: memset_26_stack:
-; CHECK:       mov w8, #43690
-; CHECK-NEXT:  mov x9, #-6148914691236517206
+; CHECK:       mov x8, #-6148914691236517206
 ; CHECK-NEXT:  mov x0, sp
+; CHECK-NEXT:  stp x8, x8, [sp, #8]
+; CHECK-NEXT:  str x8, [sp]
 ; CHECK-NEXT:  strh w8, [sp, #24]
-; CHECK-NEXT:  stp x9, x9, [sp, #8]
-; CHECK-NEXT:  str x9, [sp]
 ; CHECK-NEXT:  bl something
   %buf = alloca [26 x i8], align 1
   %cast = bitcast [26 x i8]* %buf to i8*

Removed: llvm/trunk/test/CodeGen/X86/pr38771.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr38771.ll?rev=345101&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr38771.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr38771.ll (removed)
@@ -1,24 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
-
-define void @function() nounwind {
-; CHECK-LABEL: function:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movabsq $281474976710656, %rax # imm = 0x1000000000000
-; CHECK-NEXT:    notq %rax
-; CHECK-NEXT:    movl $2147483647, %ecx # imm = 0x7FFFFFFF
-; CHECK-NEXT:    shldq $65, %rax, %rcx
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    movb $64, %dl
-; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    cmoveq %rcx, %rax
-; CHECK-NEXT:    movq %rax, (%rax)
-; CHECK-NEXT:    movl $0, (%rax)
-; CHECK-NEXT:    retq
-entry:
-  %B68 = sub i96 39614081257132168796771975167, 281474976710656
-  %B49 = or i96 39614081257132168796771975167, 39614081257132168796771975167
-  %B33 = lshr i96 %B68, %B68
-  store i96 %B33, i96* undef
-  ret void
-}