[llvm] a57236d - [PowerPC] Remove extend between shift and and

Wed Jul 5 13:33:25 PDT 2023

Author: Nemanja Ivanovic
Date: 2023-07-05T16:33:07-04:00
New Revision: a57236de4eb8f38b4201647b10146941cbbb5c0b

URL: https://github.com/llvm/llvm-project/commit/a57236de4eb8f38b4201647b10146941cbbb5c0b
DIFF: https://github.com/llvm/llvm-project/commit/a57236de4eb8f38b4201647b10146941cbbb5c0b.diff

LOG: [PowerPC] Remove extend between shift and and

The SDAG will sometimes insert an extend between
the shift and an and (immediate) even though the
immediate is narrower than the narrow size.
This does not allow us to produce a rotate
instruction (such as rlwinm).
This patch just adds a combine to move the extend
onto the and.

Differential revision: https://reviews.llvm.org/D152911

Added: 
    llvm/test/CodeGen/PowerPC/and-extend-combine.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 6cf0ccf8086a9b..f558639a0ec0df 100644

--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1381,8 +1381,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
 
   // We have target-specific dag combine patterns for the following nodes:
-  setTargetDAGCombine({ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL, ISD::MUL,
-                       ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
+  setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL,
+                       ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
   if (Subtarget.hasFPCVT())
     setTargetDAGCombine(ISD::UINT_TO_FP);
   setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
@@ -15496,6 +15496,31 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   default: break;
   case ISD::ADD:
     return combineADD(N, DCI);
+  case ISD::AND: {
+    // We don't want (and (zext (shift...)), C) if C fits in the width of the
+    // original input as that will prevent us from selecting optimal rotates.
+    SDValue Op1 = N->getOperand(0);
+    SDValue Op2 = N->getOperand(1);
+    if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
+         Op1.getOpcode() != ISD::ANY_EXTEND) ||
+        !isa<ConstantSDNode>(Op2))
+      break;
+    SDValue NarrowOp = Op1.getOperand(0);
+    unsigned NarrowOpcode = NarrowOp.getOpcode();
+    if (NarrowOpcode != ISD::SHL && NarrowOpcode != ISD::SRL &&
+        NarrowOpcode != ISD::ROTL && NarrowOpcode != ISD::ROTR &&
+        NarrowOpcode != ISD::FSHL && NarrowOpcode != ISD::FSHR)
+      break;
+
+    uint64_t Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
+    EVT NarrowVT = NarrowOp.getValueType();
+    // Make sure that the constant is narrow enough to fit in the narrow type.
+    if (Imm >= maxUIntN(NarrowVT.getSizeInBits()))
+      break;
+    SDValue ConstOp = DAG.getConstant(Imm, dl, NarrowVT);
+    SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, NarrowVT, NarrowOp, ConstOp);
+    return DAG.getAnyExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
+  }
   case ISD::SHL:
     return combineSHL(N, DCI);
   case ISD::SRA:

diff  --git a/llvm/test/CodeGen/PowerPC/and-extend-combine.ll b/llvm/test/CodeGen/PowerPC/and-extend-combine.ll
new file mode 100644
index 00000000000000..b05d0097154a55
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/and-extend-combine.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -ppc-asm-full-reg-names \
+; RUN:   -mcpu=pwr8 -verify-machineinstrs | FileCheck %s
+define dso_local ptr @foo(i32 noundef zeroext %arg, ptr nocapture noundef readonly %arg1, ptr noundef writeonly %arg2) local_unnamed_addr {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    rlwinm r3, r3, 31, 17, 28
+; CHECK-NEXT:    ldx r4, r4, r3
+; CHECK-NEXT:    clrldi r3, r4, 56
+; CHECK-NEXT:    add r3, r5, r3
+; CHECK-NEXT:    std r4, 0(r5)
+; CHECK-NEXT:    blr
+bb:
+  %i = lshr i32 %arg, 1
+  %i3 = and i32 %i, 32760
+  %i4 = zext i32 %i3 to i64
+  %i5 = getelementptr inbounds i8, ptr %arg1, i64 %i4
+  %i6 = load i64, ptr %i5, align 8
+  %i7 = and i64 %i6, 255
+  store i64 %i6, ptr %arg2, align 8
+  %i8 = getelementptr inbounds i8, ptr %arg2, i64 %i7
+  ret ptr %i8
+}