[llvm] [WIP][GISel][RISCV] Implement selectShiftMask. (PR #77572)
Mikhail Gudim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 11 13:30:11 PST 2024
https://github.com/mgudim updated https://github.com/llvm/llvm-project/pull/77572
>From b22fe66d5a72bfb3f552dbe861cc5ad5a608c422 Mon Sep 17 00:00:00 2001
From: Mikhail Gudim <mgudim at gmail.com>
Date: Wed, 10 Jan 2024 04:47:46 -0500
Subject: [PATCH] [WIP][GISel][RISCV] Implement selectShiftMask.
---
.../RISCV/GISel/RISCVInstructionSelector.cpp | 74 +++++-
.../instruction-select/shift-rv32.mir | 161 +++++++++++++
.../instruction-select/shift-rv64.mir | 214 ++++++++++++++++++
3 files changed, 446 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/shift-rv32.mir
create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/shift-rv64.mir
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 61bdbfc47d947f..705bfd8869da44 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -159,9 +159,77 @@ RISCVInstructionSelector::RISCVInstructionSelector(
InstructionSelector::ComplexRendererFns
RISCVInstructionSelector::selectShiftMask(MachineOperand &Root) const {
- // TODO: Also check if we are seeing the result of an AND operation which
- // could be bypassed since we only check the lower log2(xlen) bits.
- return {{[=](MachineInstrBuilder &MIB) { MIB.add(Root); }}};
+ if (!Root.isReg())
+ return std::nullopt;
+
+ using namespace llvm::MIPatternMatch;
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ Register RootReg = Root.getReg();
+ Register ShAmtReg = RootReg;
+ const LLT ShiftLLT = MRI.getType(RootReg);
+ unsigned ShiftWidth = ShiftLLT.getSizeInBits();
+ assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
+ // Peek through zext.
+ Register ZExtSrcReg;
+ if (mi_match(ShAmtReg, MRI, m_GZExt(m_Reg(ZExtSrcReg)))) {
+ ShAmtReg = ZExtSrcReg;
+ }
+
+ APInt AndMask;
+ Register AndSrcReg;
+ if (mi_match(ShAmtReg, MRI, m_GAnd(m_Reg(AndSrcReg), m_ICst(AndMask)))) {
+ APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
+ if (ShMask.isSubsetOf(AndMask)) {
+ ShAmtReg = AndSrcReg;
+ } else {
+ // TODO:
+ // SimplifyDemandedBits may have optimized the mask so try restoring any
+ // bits that are known zero.
+ }
+ }
+
+ APInt Imm;
+ Register Reg;
+ if (mi_match(ShAmtReg, MRI, m_GAdd(m_Reg(Reg), m_ICst(Imm)))) {
+ if (Imm != 0 && Imm.urem(ShiftWidth) == 0)
+ // If we are shifting by X+N where N == 0 mod Size, then just shift by X
+ // to avoid the ADD.
+ ShAmtReg = Reg;
+ } else if (mi_match(ShAmtReg, MRI, m_GSub(m_ICst(Imm), m_Reg(Reg)))) {
+ if (Imm != 0 && Imm.urem(ShiftWidth) == 0) {
+ // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
+ // to generate a NEG instead of a SUB of a constant.
+ ShAmtReg = MRI.createGenericVirtualRegister(ShiftLLT);
+ unsigned NegOpc = Subtarget->is64Bit()? RISCV::SUBW : RISCV::SUB;
+ return {
+ {
+ [=](MachineInstrBuilder &MIB) {
+ MachineIRBuilder(*MIB.getInstr())
+ .buildInstr(NegOpc, {ShAmtReg}, {Register(RISCV::X0), Reg});
+ MIB.addReg(ShAmtReg);
+ }
+ }
+ };
+ }
+ if ((Imm.urem(ShiftWidth) & (ShiftWidth - 1)) == ShiftWidth - 1) {
+ // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
+ // to generate a NOT instead of a SUB of a constant.
+ ShAmtReg = MRI.createGenericVirtualRegister(ShiftLLT);
+ return {
+ {
+ [=](MachineInstrBuilder &MIB) {
+ MachineIRBuilder(*MIB.getInstr())
+ .buildInstr(RISCV::XORI, {ShAmtReg}, {Reg})
+ .addImm(-1);
+ MIB.addReg(ShAmtReg);
+ }
+ }
+ };
+ }
+ }
+
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(ShAmtReg); }}};
}
InstructionSelector::ComplexRendererFns
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/shift-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/shift-rv32.mir
new file mode 100644
index 00000000000000..3cbd30cc2da82f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/shift-rv32.mir
@@ -0,0 +1,161 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=riscv32 -run-pass=instruction-select \
+# RUN: -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
+
+# This 32-bit version doesn't have tests for zext, because there is no legal type to zext from.
+---
+name: shl
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: shl
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[SLL]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s32) = COPY $x10
+ %1:gprb(s32) = COPY $x11
+ %3:gprb(s32) = G_SHL %0, %1
+ $x10 = COPY %3(s32)
+ PseudoRET implicit $x10
+...
+
+---
+name: shl_and
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: shl_and
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[SLL]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s32) = COPY $x10
+ %1:gprb(s32) = COPY $x11
+ %2:gprb(s32) = G_CONSTANT i32 31
+ %3:gprb(s32) = G_AND %1, %2
+ %4:gprb(s32) = G_SHL %0, %3(s32)
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+...
+
+---
+name: shl_add
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: shl_add
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[SLL]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s32) = COPY $x10
+ %1:gprb(s32) = COPY $x11
+ %2:gprb(s32) = G_CONSTANT i32 32
+ %3:gprb(s32) = G_ADD %1, %2
+ %4:gprb(s32) = G_SHL %0, %3(s32)
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+...
+
+---
+name: shl_sub
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: shl_sub
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB $x0, [[COPY1]]
+ ; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[SUB]]
+ ; CHECK-NEXT: $x10 = COPY [[SLL]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s32) = COPY $x10
+ %1:gprb(s32) = COPY $x11
+ %2:gprb(s32) = G_CONSTANT i32 32
+ %3:gprb(s32) = G_SUB %2, %1
+ %4:gprb(s32) = G_SHL %0, %3(s32)
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+...
+
+---
+name: shl_bitwise_not
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: shl_bitwise_not
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[XORI:%[0-9]+]]:gpr = XORI [[COPY1]], -1
+ ; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[XORI]]
+ ; CHECK-NEXT: $x10 = COPY [[SLL]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s32) = COPY $x10
+ %1:gprb(s32) = COPY $x11
+ %2:gprb(s32) = G_CONSTANT i32 -1
+ %3:gprb(s32) = G_SUB %2, %1
+ %4:gprb(s32) = G_SHL %0, %3(s32)
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+...
+
+---
+name: shl_bitwise_not_2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: shl_bitwise_not_2
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[XORI:%[0-9]+]]:gpr = XORI [[COPY1]], -1
+ ; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[XORI]]
+ ; CHECK-NEXT: $x10 = COPY [[SLL]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s32) = COPY $x10
+ %1:gprb(s32) = COPY $x11
+ %2:gprb(s32) = G_CONSTANT i32 31
+ %3:gprb(s32) = G_SUB %2, %1
+ %4:gprb(s32) = G_SHL %0, %3(s32)
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/shift-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/shift-rv64.mir
new file mode 100644
index 00000000000000..5043a24f3a3c03
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/shift-rv64.mir
@@ -0,0 +1,214 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=riscv64 -run-pass=instruction-select \
+# RUN: -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: shl
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: shl
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[SLL]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s64) = COPY $x10
+ %1:gprb(s64) = COPY $x11
+ %3:gprb(s64) = G_SHL %0, %1
+ $x10 = COPY %3(s64)
+ PseudoRET implicit $x10
+...
+
+---
+name: shl_zext
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: shl_zext
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
+ ; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[ADDI]]
+ ; CHECK-NEXT: $x10 = COPY [[SLL]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s64) = COPY $x10
+ %1:gprb(s32) = G_CONSTANT i32 1
+ %2:gprb(s64) = G_ZEXT %1
+ %3:gprb(s64) = G_SHL %0, %2(s64)
+ $x10 = COPY %3(s64)
+ PseudoRET implicit $x10
+...
+
+---
+name: shl_and
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: shl_and
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[SLL]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s64) = COPY $x10
+ %1:gprb(s64) = COPY $x11
+ %2:gprb(s64) = G_CONSTANT i64 63
+ %3:gprb(s64) = G_AND %1, %2
+ %4:gprb(s64) = G_SHL %0, %3(s64)
+ $x10 = COPY %4(s64)
+ PseudoRET implicit $x10
+...
+
+---
+name: shl_add
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: shl_add
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[SLL]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s64) = COPY $x10
+ %1:gprb(s64) = COPY $x11
+ %2:gprb(s64) = G_CONSTANT i64 64
+ %3:gprb(s64) = G_ADD %1, %2
+ %4:gprb(s64) = G_SHL %0, %3(s64)
+ $x10 = COPY %4(s64)
+ PseudoRET implicit $x10
+...
+
+---
+name: shl_sub
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: shl_sub
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[SUBW:%[0-9]+]]:gpr = SUBW $x0, [[COPY1]]
+ ; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[SUBW]]
+ ; CHECK-NEXT: $x10 = COPY [[SLL]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s64) = COPY $x10
+ %1:gprb(s64) = COPY $x11
+ %2:gprb(s64) = G_CONSTANT i64 64
+ %3:gprb(s64) = G_SUB %2, %1
+ %4:gprb(s64) = G_SHL %0, %3(s64)
+ $x10 = COPY %4(s64)
+ PseudoRET implicit $x10
+...
+
+---
+name: shl_bitwise_not
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: shl_bitwise_not
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[XORI:%[0-9]+]]:gpr = XORI [[COPY1]], -1
+ ; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[XORI]]
+ ; CHECK-NEXT: $x10 = COPY [[SLL]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s64) = COPY $x10
+ %1:gprb(s64) = COPY $x11
+ %2:gprb(s64) = G_CONSTANT i64 -1
+ %3:gprb(s64) = G_SUB %2, %1
+ %4:gprb(s64) = G_SHL %0, %3(s64)
+ $x10 = COPY %4(s64)
+ PseudoRET implicit $x10
+...
+
+---
+name: shl_bitwise_not_2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: shl_bitwise_not_2
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[XORI:%[0-9]+]]:gpr = XORI [[COPY1]], -1
+ ; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[XORI]]
+ ; CHECK-NEXT: $x10 = COPY [[SLL]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s64) = COPY $x10
+ %1:gprb(s64) = COPY $x11
+ %2:gprb(s64) = G_CONSTANT i64 63
+ %3:gprb(s64) = G_SUB %2, %1
+ %4:gprb(s64) = G_SHL %0, %3(s64)
+ $x10 = COPY %4(s64)
+ PseudoRET implicit $x10
+...
+
+---
+name: shl_and_zext
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: shl_and_zext
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: %addr:gpr = COPY $x11
+ ; CHECK-NEXT: [[LBU:%[0-9]+]]:gpr = LBU %addr, 0 :: (load (s8))
+ ; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[LBU]]
+ ; CHECK-NEXT: $x10 = COPY [[SLL]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s64) = COPY $x10
+ %addr:gprb(p0) = COPY $x11
+ %1:gprb(s32) = G_LOAD %addr(p0) :: (load (s8))
+ %2:gprb(s32) = G_CONSTANT i32 63
+ %3:gprb(s32) = G_AND %1, %2
+ %4:gprb(s64) = G_ZEXT %3
+ %5:gprb(s64) = G_SHL %0, %4(s64)
+ $x10 = COPY %5(s64)
+ PseudoRET implicit $x10
+...
More information about the llvm-commits
mailing list