[llvm] [Mips] Optimize `or (and $src1, mask0), (shl $src2, mask1)` to `ins` (PR #103017)

Tue Aug 13 00:24:09 PDT 2024

https://github.com/yingopq created https://github.com/llvm/llvm-project/pull/103017

Optimize `$dst = or (and $src1, (2**size0 - 1)), (shl $src2, size0)` to `ins $src1, $src2, pos, size`, 
where `pos = size0, size = 32 - pos`.

Fix #90325

>From e9459de01b53159865acfeb3c8513873edff8f2d Mon Sep 17 00:00:00 2001
From: Ying Huang <ying.huang at oss.cipunited.com>
Date: Fri, 2 Aug 2024 04:39:04 -0400
Subject: [PATCH] [Mips] Optimize `or (and $src1, mask0), (shl $src2, mask1)`
 to `ins`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Optimize `$dst = or (and $src1, (2**size0 - 1)), (shl $src2, size0)`
to `ins $src1, $src2, pos, size, pos = size0, size = 32 – pos`.

Fix #90325
---
 llvm/lib/Target/Mips/MipsISelLowering.cpp | 34 ++++++++++++++++++++---
 llvm/test/CodeGen/Mips/ins.ll             | 22 +++++++++++++++
 2 files changed, 52 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/Mips/ins.ll

diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 0f2047fcac640e..30b3a526db2640 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -876,21 +876,47 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const MipsSubtarget &Subtarget) {
-  // Pattern match INS.
-  //  $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1),
-  //  where mask1 = (2**size - 1) << pos, mask0 = ~mask1
-  //  => ins $dst, $src, size, pos, $src1
   if (DCI.isBeforeLegalizeOps() || !Subtarget.hasExtractInsert())
     return SDValue();
 
   SDValue And0 = N->getOperand(0), And1 = N->getOperand(1);
   unsigned SMPos0, SMSize0, SMPos1, SMSize1;
   ConstantSDNode *CN, *CN1;
+  uint64_t Pos = 0;
 
   // See if Op's first operand matches (and $src1 , mask0).
   if (And0.getOpcode() != ISD::AND)
     return SDValue();
 
+  if (And0.getOpcode() == ISD::AND && And1.getOpcode() == ISD::SHL) {
+    // Pattern match INS.
+    //   $dst = or (and $src1, (2**size0 - 1)), (shl $src2, size0)
+    //   ==> ins $src1, $src2, pos, size, pos = size0, size = 32 - pos;
+    if (!(CN = dyn_cast<ConstantSDNode>(And0.getOperand(1))) ||
+        !isShiftedMask_64(CN->getZExtValue(), SMPos0, SMSize0))
+      return SDValue();
+
+    if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))))
+      return SDValue();
+    Pos = CN->getZExtValue();
+
+    if (SMPos0 != 0 || SMSize0 != Pos || SMPos0 + SMSize0 > 32)
+      return SDValue();
+
+    SDLoc DL(N);
+    EVT ValTy = N->getValueType(0);
+    SMPos1 = Pos;
+    SMSize1 = 32 - SMPos1;
+    return DAG.getNode(MipsISD::Ins, DL, ValTy, And1.getOperand(0),
+                       DAG.getConstant(SMPos1, DL, MVT::i32),
+                       DAG.getConstant(SMSize1, DL, MVT::i32),
+                       And0.getOperand(0));
+  }
+
+  // Pattern match INS.
+  //  $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1),
+  //  where mask1 = (2**size - 1) << pos, mask0 = ~mask1
+  //  => ins $dst, $src, size, pos, $src1
   if (!(CN = dyn_cast<ConstantSDNode>(And0.getOperand(1))) ||
       !isShiftedMask_64(~CN->getSExtValue(), SMPos0, SMSize0))
     return SDValue();
diff --git a/llvm/test/CodeGen/Mips/ins.ll b/llvm/test/CodeGen/Mips/ins.ll
new file mode 100644
index 00000000000000..12cde981efae34
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/ins.ll
@@ -0,0 +1,22 @@
+; RUN: llc -O3 -mcpu=mips64r2 -mtriple=mips64el-unknown-linux-gnuabi64 < %s -o - | FileCheck %s
+
+define void @or_and_shl(ptr nocapture noundef %a, i64 noundef signext %b) {
+; CHECK-LABEL: or_and_shl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lw $1, 0($4)
+; CHECK-NEXT:    sll $2, $5, 0
+; CHECK-NEXT:    ins $1, $2, 31, 1
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    sw $1, 0($4)
+
+entry:
+  %conv = trunc i64 %b to i32
+  %load = load i32, ptr %a, align 4
+  %shl = shl i32 %conv, 31
+  %and = and i32 %load, 2147483647
+  %or = or i32 %and, %shl
+  store i32 %or, ptr %a, align 4
+  ret void
+}
+
+