[llvm] [RISCV] Porting hasAllNBitUsers to RISCV GISel for instruction select (PR #124678)
Luke Quinn via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 06:37:30 PST 2025
https://github.com/lquinn2015 updated https://github.com/llvm/llvm-project/pull/124678
>From 47591077eea12f924af67c77e87a33d87a43ff76 Mon Sep 17 00:00:00 2001
From: Luke Quinn <quic_lquinn at quicinc.com>
Date: Wed, 22 Jan 2025 08:30:40 -0800
Subject: [PATCH 1/2] [RISCV] Add GISelPredicateCode binding and
hasAllNBitUsers prototypes for staging adding full support
Signed-off-by: Luke Quinn <quic_lquinn at quicinc.com>
---
.../Target/RISCV/GISel/RISCVInstructionSelector.cpp | 10 ++++++++++
llvm/lib/Target/RISCV/RISCVInstrInfo.td | 8 ++++++--
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 3f1539da4a9c84..5ea9f728ff995e 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -57,6 +57,12 @@ class RISCVInstructionSelector : public InstructionSelector {
const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) const;
+ // const MachineInstr &MI
+ bool hasAllNBitUsers(const MachineInstr &MI, unsigned Bits, const unsigned Depth = 0) const;
+ bool hasAllBUsers(const MachineInstr &MI) const { return hasAllNBitUsers(MI, 8); }
+ bool hasAllHUsers(const MachineInstr &MI) const { return hasAllNBitUsers(MI, 16); }
+ bool hasAllWUsers(const MachineInstr &MI) const { return hasAllNBitUsers(MI, 32); }
+
bool isRegInGprb(Register Reg) const;
bool isRegInFprb(Register Reg) const;
@@ -186,6 +192,10 @@ RISCVInstructionSelector::RISCVInstructionSelector(
{
}
+bool RISCVInstructionSelector::hasAllNBitUsers(const MachineInstr &MI, unsigned Bits, const unsigned Depth) const {
+ return false;
+};
+
InstructionSelector::ComplexRendererFns
RISCVInstructionSelector::selectShiftMask(MachineOperand &Root,
unsigned ShiftWidth) const {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index bb5bb6352c32a5..fbfc354daa2f29 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1945,7 +1945,9 @@ class binop_allhusers<SDPatternOperator operator>
: PatFrag<(ops node:$lhs, node:$rhs),
(XLenVT (operator node:$lhs, node:$rhs)), [{
return hasAllHUsers(Node);
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return hasAllHUsers(MI); }];
+}
// PatFrag to allow ADDW/SUBW/MULW/SLLW to be selected from i64 add/sub/mul/shl
// if only the lower 32 bits of their result is used.
@@ -1953,7 +1955,9 @@ class binop_allwusers<SDPatternOperator operator>
: PatFrag<(ops node:$lhs, node:$rhs),
(i64 (operator node:$lhs, node:$rhs)), [{
return hasAllWUsers(Node);
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return hasAllWUsers(MI); }];
+}
def sexti32_allwusers : PatFrag<(ops node:$src),
(sext_inreg node:$src, i32), [{
>From 0e97975b63241a83cf71cbb05cf6f08009eb9aec Mon Sep 17 00:00:00 2001
From: Luke Quinn <quic_lquinn at quicinc.com>
Date: Mon, 27 Jan 2025 18:05:35 -0800
Subject: [PATCH 2/2] [RISCV] Add hasAllNBitUsers Functional change, change
allows for the generation of packw instructions along with other generic
instructions with narrow w type.
Signed-off-by: Luke Quinn <quic_lquinn at quicinc.com>
---
.../RISCV/GISel/RISCVInstructionSelector.cpp | 178 +++++++++++++++++-
llvm/lib/Target/RISCV/RISCVInstrInfo.td | 8 +-
llvm/test/CodeGen/RISCV/GlobalISel/combine.ll | 2 +-
.../CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll | 16 +-
llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll | 76 ++++----
.../test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll | 15 +-
6 files changed, 227 insertions(+), 68 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 5ea9f728ff995e..7c09b859948061 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -57,11 +57,20 @@ class RISCVInstructionSelector : public InstructionSelector {
const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) const;
+ static constexpr unsigned MaxRecursionDepth = 6;
+
// const MachineInstr &MI
- bool hasAllNBitUsers(const MachineInstr &MI, unsigned Bits, const unsigned Depth = 0) const;
- bool hasAllBUsers(const MachineInstr &MI) const { return hasAllNBitUsers(MI, 8); }
- bool hasAllHUsers(const MachineInstr &MI) const { return hasAllNBitUsers(MI, 16); }
- bool hasAllWUsers(const MachineInstr &MI) const { return hasAllNBitUsers(MI, 32); }
+ bool hasAllNBitUsers(const MachineInstr &MI, unsigned Bits,
+ const unsigned Depth = 0) const;
+ bool hasAllBUsers(const MachineInstr &MI) const {
+ return hasAllNBitUsers(MI, 8);
+ }
+ bool hasAllHUsers(const MachineInstr &MI) const {
+ return hasAllNBitUsers(MI, 16);
+ }
+ bool hasAllWUsers(const MachineInstr &MI) const {
+ return hasAllNBitUsers(MI, 32);
+ }
bool isRegInGprb(Register Reg) const;
bool isRegInFprb(Register Reg) const;
@@ -192,9 +201,166 @@ RISCVInstructionSelector::RISCVInstructionSelector(
{
}
-bool RISCVInstructionSelector::hasAllNBitUsers(const MachineInstr &MI, unsigned Bits, const unsigned Depth) const {
+bool RISCVInstructionSelector::hasAllNBitUsers(const MachineInstr &MI,
+ unsigned Bits,
+ const unsigned Depth) const {
+
+ assert((MI.getOpcode() == TargetOpcode::G_ADD ||
+ MI.getOpcode() == TargetOpcode::G_SUB ||
+ MI.getOpcode() == TargetOpcode::G_MUL ||
+ MI.getOpcode() == TargetOpcode::G_SHL ||
+ MI.getOpcode() == TargetOpcode::G_LSHR ||
+ MI.getOpcode() == TargetOpcode::G_AND ||
+ MI.getOpcode() == TargetOpcode::G_OR ||
+ MI.getOpcode() == TargetOpcode::G_XOR ||
+ MI.getOpcode() == TargetOpcode::G_SEXT_INREG || Depth != 0) &&
+ "Unexpected opcode");
+
+ if (Depth >= RISCVInstructionSelector::MaxRecursionDepth)
return false;
-};
+
+ auto DestReg = MI.getOperand(0).getReg();
+ for (auto &UserOp : MRI->use_nodbg_operands(DestReg)) {
+ assert(UserOp.getParent() && "UserOp must have a parent");
+ const MachineInstr &UserMI = *UserOp.getParent();
+ unsigned OpIdx = UserOp.getOperandNo();
+
+ switch (UserMI.getOpcode()) {
+ default:
+ return false;
+ case RISCV::ADDW:
+ case RISCV::ADDIW:
+ case RISCV::SUBW:
+ case RISCV::MULW:
+ case RISCV::SLLW:
+ case RISCV::SLLIW:
+ case RISCV::SRAW:
+ case RISCV::SRAIW:
+ case RISCV::SRLW:
+ case RISCV::SRLIW:
+ case RISCV::DIVW:
+ case RISCV::DIVUW:
+ case RISCV::REMW:
+ case RISCV::REMUW:
+ case RISCV::ROLW:
+ case RISCV::RORW:
+ case RISCV::RORIW:
+ case RISCV::CLZW:
+ case RISCV::CTZW:
+ case RISCV::CPOPW:
+ case RISCV::SLLI_UW:
+ case RISCV::FMV_W_X:
+ case RISCV::FCVT_H_W:
+ case RISCV::FCVT_H_W_INX:
+ case RISCV::FCVT_H_WU:
+ case RISCV::FCVT_H_WU_INX:
+ case RISCV::FCVT_S_W:
+ case RISCV::FCVT_S_W_INX:
+ case RISCV::FCVT_S_WU:
+ case RISCV::FCVT_S_WU_INX:
+ case RISCV::FCVT_D_W:
+ case RISCV::FCVT_D_W_INX:
+ case RISCV::FCVT_D_WU:
+ case RISCV::FCVT_D_WU_INX:
+ case RISCV::TH_REVW:
+ case RISCV::TH_SRRIW:
+ if (Bits >= 32)
+ break;
+ return false;
+ case RISCV::SLL:
+ case RISCV::SRA:
+ case RISCV::SRL:
+ case RISCV::ROL:
+ case RISCV::ROR:
+ case RISCV::BSET:
+ case RISCV::BCLR:
+ case RISCV::BINV:
+ // Shift amount operands only use log2(Xlen) bits.
+ if (OpIdx == 2 && Bits >= Log2_32(Subtarget->getXLen()))
+ break;
+ return false;
+ case RISCV::SLLI:
+ // SLLI only uses the lower (XLen - ShAmt) bits.
+ if (Bits >= Subtarget->getXLen() - UserMI.getOperand(2).getImm())
+ break;
+ return false;
+ case RISCV::ANDI:
+ if (Bits >= (unsigned)llvm::bit_width<uint64_t>(
+ (uint64_t)UserMI.getOperand(2).getImm()))
+ break;
+ goto RecCheck;
+ case RISCV::ORI: {
+ uint64_t Imm = UserMI.getOperand(2).getImm();
+ if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
+ break;
+ [[fallthrough]];
+ }
+ case RISCV::AND:
+ case RISCV::OR:
+ case RISCV::XOR:
+ case RISCV::XORI:
+ case RISCV::ANDN:
+ case RISCV::ORN:
+ case RISCV::XNOR:
+ case RISCV::SH1ADD:
+ case RISCV::SH2ADD:
+ case RISCV::SH3ADD:
+ RecCheck:
+ if (hasAllNBitUsers(UserMI, Bits, Depth + 1))
+ break;
+ return false;
+ case RISCV::SRLI: {
+ unsigned ShAmt = UserMI.getOperand(2).getImm();
+ // If we are shifting right by less than Bits, and users don't demand any
+ // bits that were shifted into [Bits-1:0], then we can consider this as an
+ // N-Bit user.
+ if (Bits > ShAmt && hasAllNBitUsers(UserMI, Bits - ShAmt, Depth + 1))
+ break;
+ return false;
+ }
+ case RISCV::SEXT_B:
+ case RISCV::PACKH:
+ if (Bits >= 8)
+ break;
+ return false;
+ case RISCV::SEXT_H:
+ case RISCV::FMV_H_X:
+ case RISCV::ZEXT_H_RV32:
+ case RISCV::ZEXT_H_RV64:
+ case RISCV::PACKW:
+ if (Bits >= 16)
+ break;
+ return false;
+ case RISCV::PACK:
+ if (Bits >= (Subtarget->getXLen() / 2))
+ break;
+ return false;
+ case RISCV::ADD_UW:
+ case RISCV::SH1ADD_UW:
+ case RISCV::SH2ADD_UW:
+ case RISCV::SH3ADD_UW:
+ // The first operand to add.uw/shXadd.uw is implicitly zero extended from
+ // 32 bits.
+ if (OpIdx == 1 && Bits >= 32)
+ break;
+ return false;
+ case RISCV::SB:
+ if (OpIdx == 0 && Bits >= 8)
+ break;
+ return false;
+ case RISCV::SH:
+ if (OpIdx == 0 && Bits >= 16)
+ break;
+ return false;
+ case RISCV::SW:
+ if (OpIdx == 0 && Bits >= 32)
+ break;
+ return false;
+ }
+ }
+
+ return true;
+}
InstructionSelector::ComplexRendererFns
RISCVInstructionSelector::selectShiftMask(MachineOperand &Root,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index fbfc354daa2f29..00458a3b54c82a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1943,17 +1943,17 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),
class binop_allhusers<SDPatternOperator operator>
: PatFrag<(ops node:$lhs, node:$rhs),
- (XLenVT (operator node:$lhs, node:$rhs)), [{
+ (XLenVT(operator node:$lhs, node:$rhs)), [{
return hasAllHUsers(Node);
}]> {
- let GISelPredicateCode = [{ return hasAllHUsers(MI); }];
+ let GISelPredicateCode = [{ return hasAllHUsers(MI); }];
}
// PatFrag to allow ADDW/SUBW/MULW/SLLW to be selected from i64 add/sub/mul/shl
// if only the lower 32 bits of their result is used.
class binop_allwusers<SDPatternOperator operator>
- : PatFrag<(ops node:$lhs, node:$rhs),
- (i64 (operator node:$lhs, node:$rhs)), [{
+ : PatFrag<(ops node:$lhs, node:$rhs), (i64(operator node:$lhs, node:$rhs)),
+ [{
return hasAllWUsers(Node);
}]> {
let GISelPredicateCode = [{ return hasAllWUsers(MI); }];
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/combine.ll b/llvm/test/CodeGen/RISCV/GlobalISel/combine.ll
index 360e84d37ec858..61d1fa5a5b9f4b 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/combine.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/combine.ll
@@ -20,7 +20,7 @@ define i32 @constant_to_rhs(i32 %x) {
; RV64-O0: # %bb.0:
; RV64-O0-NEXT: mv a1, a0
; RV64-O0-NEXT: li a0, 1
-; RV64-O0-NEXT: add a0, a0, a1
+; RV64-O0-NEXT: addw a0, a0, a1
; RV64-O0-NEXT: sext.w a0, a0
; RV64-O0-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll
index a29219bfde06bb..79d08772e88532 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll
@@ -107,7 +107,7 @@ declare i32 @llvm.fshl.i32(i32, i32, i32)
define signext i32 @rol_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: rol_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: neg a2, a1
+; RV64I-NEXT: negw a2, a1
; RV64I-NEXT: sllw a1, a0, a1
; RV64I-NEXT: srlw a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -125,7 +125,7 @@ define signext i32 @rol_i32(i32 signext %a, i32 signext %b) nounwind {
define void @rol_i32_nosext(i32 signext %a, i32 signext %b, ptr %x) nounwind {
; RV64I-LABEL: rol_i32_nosext:
; RV64I: # %bb.0:
-; RV64I-NEXT: neg a3, a1
+; RV64I-NEXT: negw a3, a1
; RV64I-NEXT: sllw a1, a0, a1
; RV64I-NEXT: srlw a0, a0, a3
; RV64I-NEXT: or a0, a1, a0
@@ -146,7 +146,7 @@ define signext i32 @rol_i32_neg_constant_rhs(i32 signext %a) nounwind {
; RV64I-LABEL: rol_i32_neg_constant_rhs:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, -2
-; RV64I-NEXT: neg a2, a0
+; RV64I-NEXT: negw a2, a0
; RV64I-NEXT: sllw a0, a1, a0
; RV64I-NEXT: srlw a1, a1, a2
; RV64I-NEXT: or a0, a0, a1
@@ -166,7 +166,7 @@ declare i64 @llvm.fshl.i64(i64, i64, i64)
define i64 @rol_i64(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: rol_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: neg a2, a1
+; RV64I-NEXT: negw a2, a1
; RV64I-NEXT: sll a1, a0, a1
; RV64I-NEXT: srl a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -185,7 +185,7 @@ declare i32 @llvm.fshr.i32(i32, i32, i32)
define signext i32 @ror_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: ror_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: neg a2, a1
+; RV64I-NEXT: negw a2, a1
; RV64I-NEXT: srlw a1, a0, a1
; RV64I-NEXT: sllw a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -203,7 +203,7 @@ define signext i32 @ror_i32(i32 signext %a, i32 signext %b) nounwind {
define void @ror_i32_nosext(i32 signext %a, i32 signext %b, ptr %x) nounwind {
; RV64I-LABEL: ror_i32_nosext:
; RV64I: # %bb.0:
-; RV64I-NEXT: neg a3, a1
+; RV64I-NEXT: negw a3, a1
; RV64I-NEXT: srlw a1, a0, a1
; RV64I-NEXT: sllw a0, a0, a3
; RV64I-NEXT: or a0, a1, a0
@@ -224,7 +224,7 @@ define signext i32 @ror_i32_neg_constant_rhs(i32 signext %a) nounwind {
; RV64I-LABEL: ror_i32_neg_constant_rhs:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, -2
-; RV64I-NEXT: neg a2, a0
+; RV64I-NEXT: negw a2, a0
; RV64I-NEXT: srlw a0, a1, a0
; RV64I-NEXT: sllw a1, a1, a2
; RV64I-NEXT: or a0, a0, a1
@@ -244,7 +244,7 @@ declare i64 @llvm.fshr.i64(i64, i64, i64)
define i64 @ror_i64(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: ror_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: neg a2, a1
+; RV64I-NEXT: negw a2, a1
; RV64I-NEXT: srl a1, a0, a1
; RV64I-NEXT: sll a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll
index 9df319e73a11a3..9a6c718703a27a 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll
@@ -31,13 +31,13 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 61681
-; RV64I-NEXT: addw a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: sraiw a1, a0, 4
; RV64I-NEXT: addw a0, a1, a0
; RV64I-NEXT: lui a1, 4112
; RV64I-NEXT: addiw a2, a2, -241
@@ -88,13 +88,13 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 61681
-; RV64I-NEXT: addw a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: sraiw a1, a0, 4
; RV64I-NEXT: addw a0, a1, a0
; RV64I-NEXT: lui a1, 4112
; RV64I-NEXT: addiw a2, a2, -241
@@ -103,7 +103,7 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
; RV64I-NEXT: call __muldi3
; RV64I-NEXT: srliw a0, a0, 24
; RV64I-NEXT: li a1, 32
-; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: subw a0, a1, a0
; RV64I-NEXT: j .LBB1_3
; RV64I-NEXT: .LBB1_2:
; RV64I-NEXT: li a0, 32
@@ -153,13 +153,13 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 61681
-; RV64I-NEXT: addw a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: sraiw a1, a0, 4
; RV64I-NEXT: addw a0, a1, a0
; RV64I-NEXT: lui a1, 4112
; RV64I-NEXT: addiw a2, a2, -241
@@ -168,7 +168,7 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
; RV64I-NEXT: call __muldi3
; RV64I-NEXT: srliw a0, a0, 24
; RV64I-NEXT: li a1, 32
-; RV64I-NEXT: sub a1, a1, a0
+; RV64I-NEXT: subw a1, a1, a0
; RV64I-NEXT: .LBB2_2: # %cond.end
; RV64I-NEXT: subw a0, s0, a1
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -212,13 +212,13 @@ define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 61681
-; RV64I-NEXT: addw a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: sraiw a1, a0, 4
; RV64I-NEXT: addw a0, a1, a0
; RV64I-NEXT: lui a1, 4112
; RV64I-NEXT: addiw a2, a2, -241
@@ -283,13 +283,13 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 61681
-; RV64I-NEXT: addw a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: sraiw a1, a0, 4
; RV64I-NEXT: addw a0, a1, a0
; RV64I-NEXT: lui a1, 4112
; RV64I-NEXT: addiw a2, a2, -241
@@ -412,13 +412,13 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 61681
-; RV64I-NEXT: addw a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: sraiw a1, a0, 4
; RV64I-NEXT: addw a0, a1, a0
; RV64I-NEXT: lui a1, 4112
; RV64I-NEXT: addiw a2, a2, -241
@@ -455,13 +455,13 @@ define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 61681
-; RV64I-NEXT: addw a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: sraiw a1, a0, 4
; RV64I-NEXT: addw a0, a1, a0
; RV64I-NEXT: lui a1, 4112
; RV64I-NEXT: addiw a2, a2, -241
@@ -497,13 +497,13 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 61681
-; RV64I-NEXT: addw a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: sraiw a1, a0, 4
; RV64I-NEXT: addw a0, a1, a0
; RV64I-NEXT: lui a1, 4112
; RV64I-NEXT: addiw a2, a2, -241
@@ -553,13 +553,13 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 61681
-; RV64I-NEXT: addw a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: sraiw a1, a0, 4
; RV64I-NEXT: addw a0, a1, a0
; RV64I-NEXT: lui a1, 4112
; RV64I-NEXT: addiw a2, a2, -241
@@ -672,13 +672,13 @@ define signext i32 @ctpop_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 61681
-; RV64I-NEXT: addw a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: sraiw a1, a0, 4
; RV64I-NEXT: addw a0, a1, a0
; RV64I-NEXT: lui a1, 4112
; RV64I-NEXT: addiw a2, a2, -241
@@ -709,13 +709,13 @@ define i1 @ctpop_i32_ult_two(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 61681
-; RV64I-NEXT: addw a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: sraiw a1, a0, 4
; RV64I-NEXT: addw a0, a1, a0
; RV64I-NEXT: lui a1, 4112
; RV64I-NEXT: addiw a2, a2, -241
@@ -750,13 +750,13 @@ define signext i32 @ctpop_i32_load(ptr %p) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 61681
-; RV64I-NEXT: addw a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: sraiw a1, a0, 4
; RV64I-NEXT: addw a0, a1, a0
; RV64I-NEXT: lui a1, 4112
; RV64I-NEXT: addiw a2, a2, -241
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll
index bf430c618afca2..558424b53be951 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll
@@ -16,9 +16,7 @@ define signext i32 @pack_i32(i32 signext %a, i32 signext %b) nounwind {
;
; RV64ZBKB-LABEL: pack_i32:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: zext.h a0, a0
-; RV64ZBKB-NEXT: slliw a1, a1, 16
-; RV64ZBKB-NEXT: or a0, a1, a0
+; RV64ZBKB-NEXT: packw a0, a0, a1
; RV64ZBKB-NEXT: ret
%shl = and i32 %a, 65535
%shl1 = shl i32 %b, 16
@@ -37,9 +35,7 @@ define signext i32 @pack_i32_2(i16 zeroext %a, i16 zeroext %b) nounwind {
;
; RV64ZBKB-LABEL: pack_i32_2:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: slli a1, a1, 16
-; RV64ZBKB-NEXT: or a0, a1, a0
-; RV64ZBKB-NEXT: sext.w a0, a0
+; RV64ZBKB-NEXT: packw a0, a0, a1
; RV64ZBKB-NEXT: ret
%zexta = zext i16 %a to i32
%zextb = zext i16 %b to i32
@@ -60,8 +56,7 @@ define signext i32 @pack_i32_3(i16 zeroext %0, i16 zeroext %1, i32 signext %2) {
;
; RV64ZBKB-LABEL: pack_i32_3:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: slli a0, a0, 16
-; RV64ZBKB-NEXT: or a0, a0, a1
+; RV64ZBKB-NEXT: packw a0, a1, a0
; RV64ZBKB-NEXT: addw a0, a0, a2
; RV64ZBKB-NEXT: ret
%4 = zext i16 %0 to i32
@@ -343,9 +338,7 @@ define signext i32 @pack_i32_allWUsers(i16 zeroext %0, i16 zeroext %1, i16 zeroe
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: add a0, a1, a0
; RV64ZBKB-NEXT: zext.h a0, a0
-; RV64ZBKB-NEXT: slli a0, a0, 16
-; RV64ZBKB-NEXT: or a0, a0, a2
-; RV64ZBKB-NEXT: sext.w a0, a0
+; RV64ZBKB-NEXT: packw a0, a2, a0
; RV64ZBKB-NEXT: ret
%4 = add i16 %1, %0
%5 = zext i16 %4 to i32
More information about the llvm-commits
mailing list