[llvm] 1097ee6 - [RISCV] Optimize (srl (and X, 0xffff), C) -> (srli (slli X, 16), 16 + C).

Mon Feb 1 09:38:12 PST 2021

Author: Craig Topper
Date: 2021-02-01T09:37:55-08:00
New Revision: 1097ee61bf3ef4f36a7e92b4d8781a5c069c93de

URL: https://github.com/llvm/llvm-project/commit/1097ee61bf3ef4f36a7e92b4d8781a5c069c93de
DIFF: https://github.com/llvm/llvm-project/commit/1097ee61bf3ef4f36a7e92b4d8781a5c069c93de.diff

LOG: [RISCV] Optimize (srl (and X, 0xffff), C) -> (srli (slli X, 16), 16 + C).

Rather than materializing the 0xffff immediate for the AND, use
a shift left to remove the upper bits and then shift in zeros
from the right.

This pattern occurs when type legalizing an i16 right shift.

I've implemented this with custom selection code for a number of
reasons. I've limited this to the AND having a single use. We need
to compensate for SimplifyDemandedBits altering the AND mask. I'm
using *W opcodes on RV64. We may want to generlize this in the
future. For all these reason it seemed easiest to do it this way.

Reviewed By: luismarques

Differential Revision: https://reviews.llvm.org/D95774

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/test/CodeGen/RISCV/alu16.ll
    llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 291051a7d976..9e6b11698d44 100644

--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -551,6 +551,39 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
     ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm));
     return;
   }
+  case ISD::SRL: {
+    // Optimize (srl (and X, 0xffff), C) -> (srli (slli X, 16), 16 + C).
+    // Taking into account that the 0xffff may have had lower bits removed by
+    // SimplifyDemandedBits.
+    // This avoids materializing the 0xffff immediate. This pattern occurs when
+    // type legalizing i16 right shifts.
+    // FIXME: This could be extended to other AND masks.
+    auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+    if (N1C) {
+      uint64_t ShAmt = N1C->getZExtValue();
+      SDValue N0 = Node->getOperand(0);
+      if (ShAmt < 16 && N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
+          isa<ConstantSDNode>(N0.getOperand(1))) {
+        uint64_t Mask = N0.getConstantOperandVal(1);
+        Mask |= maskTrailingOnes<uint64_t>(ShAmt);
+        if (Mask == 0xffff) {
+          SDLoc DL(Node);
+          unsigned SLLOpc = Subtarget->is64Bit() ? RISCV::SLLIW : RISCV::SLLI;
+          unsigned SRLOpc = Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI;
+          SDNode *SLLI =
+              CurDAG->getMachineNode(SLLOpc, DL, VT, N0->getOperand(0),
+                                     CurDAG->getTargetConstant(16, DL, VT));
+          SDNode *SRLI = CurDAG->getMachineNode(
+              SRLOpc, DL, VT, SDValue(SLLI, 0),
+              CurDAG->getTargetConstant(16 + ShAmt, DL, VT));
+          ReplaceNode(Node, SRLI);
+          return;
+        }
+      }
+    }
+
+    break;
+  }
   case ISD::INTRINSIC_W_CHAIN: {
     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
     switch (IntNo) {

diff  --git a/llvm/test/CodeGen/RISCV/alu16.ll b/llvm/test/CodeGen/RISCV/alu16.ll
index 15bf44e526e7..d795f154f841 100644
--- a/llvm/test/CodeGen/RISCV/alu16.ll
+++ b/llvm/test/CodeGen/RISCV/alu16.ll
@@ -121,18 +121,14 @@ define i16 @slli(i16 %a) nounwind {
 define i16 @srli(i16 %a) nounwind {
 ; RV32I-LABEL: srli:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi a1, a1, -64
-; RV32I-NEXT:    and a0, a0, a1
-; RV32I-NEXT:    srli a0, a0, 6
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srli a0, a0, 22
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: srli:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw a1, a1, -64
-; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    srli a0, a0, 6
+; RV64I-NEXT:    slliw a0, a0, 16
+; RV64I-NEXT:    srliw a0, a0, 22
 ; RV64I-NEXT:    ret
   %1 = lshr i16 %a, 6
   ret i16 %1

diff  --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
index db8b289aa407..e92444147f5c 100644
--- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
@@ -17,22 +17,18 @@ declare i32 @llvm.ctpop.i32(i32)
 define i16 @test_bswap_i16(i16 %a) nounwind {
 ; RV32I-LABEL: test_bswap_i16:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi a1, a1, -256
-; RV32I-NEXT:    and a1, a0, a1
-; RV32I-NEXT:    srli a1, a1, 8
-; RV32I-NEXT:    slli a0, a0, 8
-; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srli a0, a0, 24
+; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_bswap_i16:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw a1, a1, -256
-; RV64I-NEXT:    and a1, a0, a1
-; RV64I-NEXT:    srli a1, a1, 8
-; RV64I-NEXT:    slli a0, a0, 8
-; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    slliw a0, a0, 16
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    ret
   %tmp = call i16 @llvm.bswap.i16(i16 %a)
   ret i16 %tmp