[llvm] 847235b - [RISCV] Add DAG combine to turn (sub (shl X, 8), X) into orc.b (#96680)

Tue Jun 25 15:55:13 PDT 2024

Author: Craig Topper
Date: 2024-06-25T15:55:09-07:00
New Revision: 847235bbef153c6d805d415cea8494297eaf2bdb

URL: https://github.com/llvm/llvm-project/commit/847235bbef153c6d805d415cea8494297eaf2bdb
DIFF: https://github.com/llvm/llvm-project/commit/847235bbef153c6d805d415cea8494297eaf2bdb.diff

LOG: [RISCV] Add DAG combine to turn (sub (shl X, 8), X) into orc.b (#96680)

If only bits 8, 16, 24, 32, etc. can be non-zero.

This is what (mul X, 255) is decomposed to. This decomposition happens
early before RISC-V DAG combine runs.

This patch does not support types larger than XLen so i64 on rv32 fails
to generate 2 orc.b instructions. It might have worked if the mul hadn't
been decomposed before it was expanded.

Partial fix for #96595.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rv32zbb.ll
    llvm/test/CodeGen/RISCV/rv64zbb.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7c7f167821234..9e988783e2eb6 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -12502,12 +12502,15 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
     }
     break;
   }
-  case RISCVISD::BREV8: {
+  case RISCVISD::BREV8:
+  case RISCVISD::ORC_B: {
     MVT VT = N->getSimpleValueType(0);
     MVT XLenVT = Subtarget.getXLenVT();
     assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
            "Unexpected custom legalisation");
-    assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
+    assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
+            (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
+           "Unexpected extension");
     SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
     SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
     // ReplaceNodeResults requires we maintain the same type for the return
@@ -13345,6 +13348,35 @@ static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
 }
 
+// Looks for (sub (shl X, 8), X) where only bits 8, 16, 24, 32, etc. of X are
+// non-zero. Replace with orc.b.
+static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG,
+                                     const RISCVSubtarget &Subtarget) {
+  if (!Subtarget.hasStdExtZbb())
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+
+  if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  if (N0.getOpcode() != ISD::SHL || N0.getOperand(0) != N1 || !N0.hasOneUse())
+    return SDValue();
+
+  auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+  if (!ShAmtC || ShAmtC->getZExtValue() != 8)
+    return SDValue();
+
+  APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0xfe));
+  if (!DAG.MaskedValueIsZero(N1, Mask))
+    return SDValue();
+
+  return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, N1);
+}
+
 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
                                  const RISCVSubtarget &Subtarget) {
   if (SDValue V = combineSubOfBoolean(N, DAG))
@@ -13367,6 +13399,8 @@ static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
 
   if (SDValue V = combineBinOpOfZExt(N, DAG))
     return V;
+  if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
+    return V;
 
   // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
   //      (select lhs, rhs, cc, x, (sub x, y))

diff  --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index f25aa0de89da8..cb9fc6c16333e 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1356,3 +1356,64 @@ define i64 @bswap_i64(i64 %a) {
   %1 = call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %1
 }
+
+define i16 @orc_b_i16(i16 %a) {
+; RV32I-LABEL: orc_b_i16:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a0, a0, 257
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: orc_b_i16:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    andi a0, a0, 257
+; RV32ZBB-NEXT:    orc.b a0, a0
+; RV32ZBB-NEXT:    ret
+  %1 = and i16 %a, 257
+  %2 = mul nuw i16 %1, 255
+  ret i16 %2
+}
+
+define i32 @orc_b_i32(i32 %a) {
+; RV32I-LABEL: orc_b_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: orc_b_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    lui a1, 4112
+; RV32ZBB-NEXT:    addi a1, a1, 257
+; RV32ZBB-NEXT:    and a0, a0, a1
+; RV32ZBB-NEXT:    orc.b a0, a0
+; RV32ZBB-NEXT:    ret
+  %1 = and i32 %a, 16843009
+  %2 = mul nuw i32 %1, 255
+  ret i32 %2
+}
+
+define i64 @orc_b_i64(i64 %a) {
+; CHECK-LABEL: orc_b_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a2, 4112
+; CHECK-NEXT:    addi a2, a2, 257
+; CHECK-NEXT:    and a1, a1, a2
+; CHECK-NEXT:    and a0, a0, a2
+; CHECK-NEXT:    slli a2, a0, 8
+; CHECK-NEXT:    sltu a3, a2, a0
+; CHECK-NEXT:    srli a4, a0, 24
+; CHECK-NEXT:    slli a5, a1, 8
+; CHECK-NEXT:    or a4, a5, a4
+; CHECK-NEXT:    sub a1, a4, a1
+; CHECK-NEXT:    sub a1, a1, a3
+; CHECK-NEXT:    sub a0, a2, a0
+; CHECK-NEXT:    ret
+  %1 = and i64 %a, 72340172838076673
+  %2 = mul nuw i64 %1, 255
+  ret i64 %2
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 4d5ef5db86057..6c354cc1b446b 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1494,3 +1494,69 @@ define i64 @bswap_i64(i64 %a) {
   %1 = call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %1
 }
+
+define i16 @orc_b_i16(i16 %a) {
+; RV64I-LABEL: orc_b_i16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a0, a0, 257
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    sub a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: orc_b_i16:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    andi a0, a0, 257
+; RV64ZBB-NEXT:    orc.b a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = and i16 %a, 257
+  %2 = mul nuw i16 %1, 255
+  ret i16 %2
+}
+
+define i32 @orc_b_i32(i32 %a) {
+; RV64I-LABEL: orc_b_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    subw a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: orc_b_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a1, 4112
+; RV64ZBB-NEXT:    addiw a1, a1, 257
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    orc.b a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = and i32 %a, 16843009
+  %2 = mul nuw i32 %1, 255
+  ret i32 %2
+}
+
+define i64 @orc_b_i64(i64 %a) {
+; RV64I-LABEL: orc_b_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    sub a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: orc_b_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a1, 4112
+; RV64ZBB-NEXT:    addiw a1, a1, 257
+; RV64ZBB-NEXT:    slli a2, a1, 32
+; RV64ZBB-NEXT:    add a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    orc.b a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = and i64 %a, 72340172838076673
+  %2 = mul nuw i64 %1, 255
+  ret i64 %2
+}