[llvm] b0e249d - Reland "[PowerPC] Remove extend between shift and and"

Fri Jul 7 11:45:16 PDT 2023

Author: Nemanja Ivanovic
Date: 2023-07-07T14:45:05-04:00
New Revision: b0e249d5e289dc3f7e4a7cff785453206925f8b9

URL: https://github.com/llvm/llvm-project/commit/b0e249d5e289dc3f7e4a7cff785453206925f8b9
DIFF: https://github.com/llvm/llvm-project/commit/b0e249d5e289dc3f7e4a7cff785453206925f8b9.diff

LOG: Reland "[PowerPC] Remove extend between shift and and"

The commit originally caused a bootstrap failure on the big endian
PPC bot as the combine was interfering with the legalizer when
applied on illegal types. This update restricts the combine to
the only types for which it is actually needed. Tested on PPC BE
bootstrap locally.

Added: 
    llvm/test/CodeGen/PowerPC/and-extend-combine.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 6cf0ccf8086a9b..b13fcd315e1aed 100644

--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1381,8 +1381,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
 
   // We have target-specific dag combine patterns for the following nodes:
-  setTargetDAGCombine({ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL, ISD::MUL,
-                       ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
+  setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL,
+                       ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
   if (Subtarget.hasFPCVT())
     setTargetDAGCombine(ISD::UINT_TO_FP);
   setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
@@ -15496,6 +15496,30 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   default: break;
   case ISD::ADD:
     return combineADD(N, DCI);
+  case ISD::AND: {
+    // We don't want (and (zext (shift...)), C) if C fits in the width of the
+    // original input as that will prevent us from selecting optimal rotates.
+    // This only matters if the input to the extend is i32 widened to i64.
+    SDValue Op1 = N->getOperand(0);
+    SDValue Op2 = N->getOperand(1);
+    if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
+         Op1.getOpcode() != ISD::ANY_EXTEND) ||
+        !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
+        Op1.getOperand(0).getValueType() != MVT::i32)
+      break;
+    SDValue NarrowOp = Op1.getOperand(0);
+    if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
+        NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
+      break;
+
+    uint64_t Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
+    // Make sure that the constant is narrow enough to fit in the narrow type.
+    if (!isUInt<32>(Imm))
+      break;
+    SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
+    SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
+    return DAG.getAnyExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
+  }
   case ISD::SHL:
     return combineSHL(N, DCI);
   case ISD::SRA:

diff  --git a/llvm/test/CodeGen/PowerPC/and-extend-combine.ll b/llvm/test/CodeGen/PowerPC/and-extend-combine.ll
new file mode 100644
index 00000000000000..b05d0097154a55
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/and-extend-combine.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -ppc-asm-full-reg-names \
+; RUN:   -mcpu=pwr8 -verify-machineinstrs | FileCheck %s
+define dso_local ptr @foo(i32 noundef zeroext %arg, ptr nocapture noundef readonly %arg1, ptr noundef writeonly %arg2) local_unnamed_addr {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    rlwinm r3, r3, 31, 17, 28
+; CHECK-NEXT:    ldx r4, r4, r3
+; CHECK-NEXT:    clrldi r3, r4, 56
+; CHECK-NEXT:    add r3, r5, r3
+; CHECK-NEXT:    std r4, 0(r5)
+; CHECK-NEXT:    blr
+bb:
+  %i = lshr i32 %arg, 1
+  %i3 = and i32 %i, 32760
+  %i4 = zext i32 %i3 to i64
+  %i5 = getelementptr inbounds i8, ptr %arg1, i64 %i4
+  %i6 = load i64, ptr %i5, align 8
+  %i7 = and i64 %i6, 255
+  store i64 %i6, ptr %arg2, align 8
+  %i8 = getelementptr inbounds i8, ptr %arg2, i64 %i7
+  ret ptr %i8
+}