[llvm] 4be1099 - [RISCV] Improve fixed vector handling in isCtpopFast. (#158380)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 16 09:47:13 PDT 2025
Author: Craig Topper
Date: 2025-09-16T09:47:09-07:00
New Revision: 4be1099607c97b9f28cd30d56149e7c6428c216c
URL: https://github.com/llvm/llvm-project/commit/4be1099607c97b9f28cd30d56149e7c6428c216c
DIFF: https://github.com/llvm/llvm-project/commit/4be1099607c97b9f28cd30d56149e7c6428c216c.diff
LOG: [RISCV] Improve fixed vector handling in isCtpopFast. (#158380)
Previously we considered fixed vectors fast if Zvbb or Zbb is
enabled. Zbb only helps if the vector type will end up being
scalarized.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rv32zbb.ll
llvm/test/CodeGen/RISCV/rv64zbb.ll
llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9d90eb0a65218..aa3a8697c0d55 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2752,7 +2752,7 @@ bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
case MVT::i8:
case MVT::i16:
case MVT::i32:
- return true;
+ return Subtarget.hasVInstructions();
case MVT::i64:
return Subtarget.hasVInstructionsI64();
case MVT::f16:
@@ -24820,12 +24820,16 @@ bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
}
bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
- if (VT.isScalableVector())
- return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
- if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
- return true;
- return Subtarget.hasCPOPLike() &&
- (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
+ if (VT.isVector()) {
+ EVT SVT = VT.getVectorElementType();
+ // If the element type is legal we can use cpop.v if it is enabled.
+ if (isLegalElementTypeForRVV(SVT))
+ return Subtarget.hasStdExtZvbb();
+ // Don't consider it fast if the type needs to be legalized or scalarized.
+ return false;
+ }
+
+ return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64);
}
unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 3b3ef72e32aa7..a1a843a7c1ba7 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -423,100 +423,62 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind {
}
define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i32_ult_two:
-; RV32I: # %bb.0:
-; RV32I-NEXT: addi a2, a0, -1
-; RV32I-NEXT: addi a3, a1, -1
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: and a0, a0, a2
-; RV32I-NEXT: seqz a0, a0
-; RV32I-NEXT: seqz a1, a1
-; RV32I-NEXT: ret
-;
-; RV32ZBB-LABEL: ctpop_v2i32_ult_two:
-; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: cpop a1, a1
-; RV32ZBB-NEXT: cpop a0, a0
-; RV32ZBB-NEXT: sltiu a0, a0, 2
-; RV32ZBB-NEXT: sltiu a1, a1, 2
-; RV32ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i32_ult_two:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, -1
+; CHECK-NEXT: addi a3, a1, -1
+; CHECK-NEXT: and a1, a1, a3
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: seqz a1, a1
+; CHECK-NEXT: ret
%1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
%2 = icmp ult <2 x i32> %1, <i32 2, i32 2>
ret <2 x i1> %2
}
define <2 x i1> @ctpop_v2i32_ugt_one(<2 x i32> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i32_ugt_one:
-; RV32I: # %bb.0:
-; RV32I-NEXT: addi a2, a0, -1
-; RV32I-NEXT: addi a3, a1, -1
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: and a0, a0, a2
-; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: snez a1, a1
-; RV32I-NEXT: ret
-;
-; RV32ZBB-LABEL: ctpop_v2i32_ugt_one:
-; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: cpop a1, a1
-; RV32ZBB-NEXT: cpop a0, a0
-; RV32ZBB-NEXT: sltiu a0, a0, 2
-; RV32ZBB-NEXT: sltiu a1, a1, 2
-; RV32ZBB-NEXT: xori a0, a0, 1
-; RV32ZBB-NEXT: xori a1, a1, 1
-; RV32ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i32_ugt_one:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, -1
+; CHECK-NEXT: addi a3, a1, -1
+; CHECK-NEXT: and a1, a1, a3
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: snez a1, a1
+; CHECK-NEXT: ret
%1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
%2 = icmp ugt <2 x i32> %1, <i32 1, i32 1>
ret <2 x i1> %2
}
define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i32_eq_one:
-; RV32I: # %bb.0:
-; RV32I-NEXT: addi a2, a0, -1
-; RV32I-NEXT: addi a3, a1, -1
-; RV32I-NEXT: xor a1, a1, a3
-; RV32I-NEXT: xor a0, a0, a2
-; RV32I-NEXT: sltu a0, a2, a0
-; RV32I-NEXT: sltu a1, a3, a1
-; RV32I-NEXT: ret
-;
-; RV32ZBB-LABEL: ctpop_v2i32_eq_one:
-; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: cpop a1, a1
-; RV32ZBB-NEXT: cpop a0, a0
-; RV32ZBB-NEXT: addi a0, a0, -1
-; RV32ZBB-NEXT: addi a1, a1, -1
-; RV32ZBB-NEXT: seqz a0, a0
-; RV32ZBB-NEXT: seqz a1, a1
-; RV32ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i32_eq_one:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, -1
+; CHECK-NEXT: addi a3, a1, -1
+; CHECK-NEXT: xor a1, a1, a3
+; CHECK-NEXT: xor a0, a0, a2
+; CHECK-NEXT: sltu a0, a2, a0
+; CHECK-NEXT: sltu a1, a3, a1
+; CHECK-NEXT: ret
%1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
%2 = icmp eq <2 x i32> %1, <i32 1, i32 1>
ret <2 x i1> %2
}
define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i32_ne_one:
-; RV32I: # %bb.0:
-; RV32I-NEXT: addi a2, a0, -1
-; RV32I-NEXT: addi a3, a1, -1
-; RV32I-NEXT: xor a1, a1, a3
-; RV32I-NEXT: xor a0, a0, a2
-; RV32I-NEXT: sltu a0, a2, a0
-; RV32I-NEXT: sltu a1, a3, a1
-; RV32I-NEXT: xori a0, a0, 1
-; RV32I-NEXT: xori a1, a1, 1
-; RV32I-NEXT: ret
-;
-; RV32ZBB-LABEL: ctpop_v2i32_ne_one:
-; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: cpop a1, a1
-; RV32ZBB-NEXT: cpop a0, a0
-; RV32ZBB-NEXT: addi a0, a0, -1
-; RV32ZBB-NEXT: addi a1, a1, -1
-; RV32ZBB-NEXT: snez a0, a0
-; RV32ZBB-NEXT: snez a1, a1
-; RV32ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i32_ne_one:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, -1
+; CHECK-NEXT: addi a3, a1, -1
+; CHECK-NEXT: xor a1, a1, a3
+; CHECK-NEXT: xor a0, a0, a2
+; CHECK-NEXT: sltu a0, a2, a0
+; CHECK-NEXT: sltu a1, a3, a1
+; CHECK-NEXT: xori a0, a0, 1
+; CHECK-NEXT: xori a1, a1, 1
+; CHECK-NEXT: ret
%1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
%2 = icmp ne <2 x i32> %1, <i32 1, i32 1>
ret <2 x i1> %2
@@ -792,200 +754,130 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
}
define <2 x i1> @ctpop_v2i64_ult_two(<2 x i64> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i64_ult_two:
-; RV32I: # %bb.0:
-; RV32I-NEXT: lw a1, 0(a0)
-; RV32I-NEXT: lw a2, 8(a0)
-; RV32I-NEXT: lw a3, 4(a0)
-; RV32I-NEXT: lw a0, 12(a0)
-; RV32I-NEXT: seqz a4, a1
-; RV32I-NEXT: seqz a5, a2
-; RV32I-NEXT: addi a6, a1, -1
-; RV32I-NEXT: addi a7, a2, -1
-; RV32I-NEXT: sub a4, a3, a4
-; RV32I-NEXT: sub a5, a0, a5
-; RV32I-NEXT: and a2, a2, a7
-; RV32I-NEXT: and a1, a1, a6
-; RV32I-NEXT: and a0, a0, a5
-; RV32I-NEXT: and a3, a3, a4
-; RV32I-NEXT: or a1, a1, a3
-; RV32I-NEXT: or a2, a2, a0
-; RV32I-NEXT: seqz a0, a1
-; RV32I-NEXT: seqz a1, a2
-; RV32I-NEXT: ret
-;
-; RV32ZBB-LABEL: ctpop_v2i64_ult_two:
-; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a1, 12(a0)
-; RV32ZBB-NEXT: lw a2, 8(a0)
-; RV32ZBB-NEXT: lw a3, 4(a0)
-; RV32ZBB-NEXT: lw a0, 0(a0)
-; RV32ZBB-NEXT: cpop a1, a1
-; RV32ZBB-NEXT: cpop a2, a2
-; RV32ZBB-NEXT: cpop a3, a3
-; RV32ZBB-NEXT: cpop a0, a0
-; RV32ZBB-NEXT: add a1, a2, a1
-; RV32ZBB-NEXT: add a0, a0, a3
-; RV32ZBB-NEXT: sltiu a0, a0, 2
-; RV32ZBB-NEXT: sltiu a1, a1, 2
-; RV32ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i64_ult_two:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a0)
+; CHECK-NEXT: lw a2, 8(a0)
+; CHECK-NEXT: lw a3, 4(a0)
+; CHECK-NEXT: lw a0, 12(a0)
+; CHECK-NEXT: seqz a4, a1
+; CHECK-NEXT: seqz a5, a2
+; CHECK-NEXT: addi a6, a1, -1
+; CHECK-NEXT: addi a7, a2, -1
+; CHECK-NEXT: sub a4, a3, a4
+; CHECK-NEXT: sub a5, a0, a5
+; CHECK-NEXT: and a2, a2, a7
+; CHECK-NEXT: and a1, a1, a6
+; CHECK-NEXT: and a0, a0, a5
+; CHECK-NEXT: and a3, a3, a4
+; CHECK-NEXT: or a1, a1, a3
+; CHECK-NEXT: or a2, a2, a0
+; CHECK-NEXT: seqz a0, a1
+; CHECK-NEXT: seqz a1, a2
+; CHECK-NEXT: ret
%1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
%2 = icmp ult <2 x i64> %1, <i64 2, i64 2>
ret <2 x i1> %2
}
define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i64_ugt_one:
-; RV32I: # %bb.0:
-; RV32I-NEXT: lw a1, 0(a0)
-; RV32I-NEXT: lw a2, 8(a0)
-; RV32I-NEXT: lw a3, 4(a0)
-; RV32I-NEXT: lw a0, 12(a0)
-; RV32I-NEXT: seqz a4, a1
-; RV32I-NEXT: seqz a5, a2
-; RV32I-NEXT: addi a6, a1, -1
-; RV32I-NEXT: addi a7, a2, -1
-; RV32I-NEXT: sub a4, a3, a4
-; RV32I-NEXT: sub a5, a0, a5
-; RV32I-NEXT: and a2, a2, a7
-; RV32I-NEXT: and a1, a1, a6
-; RV32I-NEXT: and a0, a0, a5
-; RV32I-NEXT: and a3, a3, a4
-; RV32I-NEXT: or a1, a1, a3
-; RV32I-NEXT: or a2, a2, a0
-; RV32I-NEXT: snez a0, a1
-; RV32I-NEXT: snez a1, a2
-; RV32I-NEXT: ret
-;
-; RV32ZBB-LABEL: ctpop_v2i64_ugt_one:
-; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a1, 12(a0)
-; RV32ZBB-NEXT: lw a2, 8(a0)
-; RV32ZBB-NEXT: lw a3, 4(a0)
-; RV32ZBB-NEXT: lw a0, 0(a0)
-; RV32ZBB-NEXT: cpop a1, a1
-; RV32ZBB-NEXT: cpop a2, a2
-; RV32ZBB-NEXT: cpop a3, a3
-; RV32ZBB-NEXT: cpop a0, a0
-; RV32ZBB-NEXT: add a1, a2, a1
-; RV32ZBB-NEXT: add a0, a0, a3
-; RV32ZBB-NEXT: sltiu a0, a0, 2
-; RV32ZBB-NEXT: sltiu a1, a1, 2
-; RV32ZBB-NEXT: xori a0, a0, 1
-; RV32ZBB-NEXT: xori a1, a1, 1
-; RV32ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i64_ugt_one:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a0)
+; CHECK-NEXT: lw a2, 8(a0)
+; CHECK-NEXT: lw a3, 4(a0)
+; CHECK-NEXT: lw a0, 12(a0)
+; CHECK-NEXT: seqz a4, a1
+; CHECK-NEXT: seqz a5, a2
+; CHECK-NEXT: addi a6, a1, -1
+; CHECK-NEXT: addi a7, a2, -1
+; CHECK-NEXT: sub a4, a3, a4
+; CHECK-NEXT: sub a5, a0, a5
+; CHECK-NEXT: and a2, a2, a7
+; CHECK-NEXT: and a1, a1, a6
+; CHECK-NEXT: and a0, a0, a5
+; CHECK-NEXT: and a3, a3, a4
+; CHECK-NEXT: or a1, a1, a3
+; CHECK-NEXT: or a2, a2, a0
+; CHECK-NEXT: snez a0, a1
+; CHECK-NEXT: snez a1, a2
+; CHECK-NEXT: ret
%1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
%2 = icmp ugt <2 x i64> %1, <i64 1, i64 1>
ret <2 x i1> %2
}
define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i64_eq_one:
-; RV32I: # %bb.0:
-; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: lw a0, 0(a0)
-; RV32I-NEXT: lw a3, 4(a1)
-; RV32I-NEXT: lw a2, 12(a1)
-; RV32I-NEXT: beqz a3, .LBB22_3
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: seqz a0, a0
-; RV32I-NEXT: sub a0, a3, a0
-; RV32I-NEXT: xor a3, a3, a0
-; RV32I-NEXT: sltu a0, a0, a3
-; RV32I-NEXT: lw a1, 8(a1)
-; RV32I-NEXT: bnez a2, .LBB22_4
-; RV32I-NEXT: .LBB22_2:
-; RV32I-NEXT: addi a2, a1, -1
-; RV32I-NEXT: xor a1, a1, a2
-; RV32I-NEXT: sltu a1, a2, a1
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB22_3:
-; RV32I-NEXT: addi a3, a0, -1
-; RV32I-NEXT: xor a0, a0, a3
-; RV32I-NEXT: sltu a0, a3, a0
-; RV32I-NEXT: lw a1, 8(a1)
-; RV32I-NEXT: beqz a2, .LBB22_2
-; RV32I-NEXT: .LBB22_4:
-; RV32I-NEXT: seqz a1, a1
-; RV32I-NEXT: sub a1, a2, a1
-; RV32I-NEXT: xor a2, a2, a1
-; RV32I-NEXT: sltu a1, a1, a2
-; RV32I-NEXT: ret
-;
-; RV32ZBB-LABEL: ctpop_v2i64_eq_one:
-; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a1, 12(a0)
-; RV32ZBB-NEXT: lw a2, 8(a0)
-; RV32ZBB-NEXT: lw a3, 4(a0)
-; RV32ZBB-NEXT: lw a0, 0(a0)
-; RV32ZBB-NEXT: cpop a1, a1
-; RV32ZBB-NEXT: cpop a2, a2
-; RV32ZBB-NEXT: cpop a3, a3
-; RV32ZBB-NEXT: cpop a0, a0
-; RV32ZBB-NEXT: add a1, a2, a1
-; RV32ZBB-NEXT: add a0, a0, a3
-; RV32ZBB-NEXT: addi a0, a0, -1
-; RV32ZBB-NEXT: addi a1, a1, -1
-; RV32ZBB-NEXT: seqz a0, a0
-; RV32ZBB-NEXT: seqz a1, a1
-; RV32ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i64_eq_one:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: lw a0, 0(a0)
+; CHECK-NEXT: lw a3, 4(a1)
+; CHECK-NEXT: lw a2, 12(a1)
+; CHECK-NEXT: beqz a3, .LBB22_3
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: sub a0, a3, a0
+; CHECK-NEXT: xor a3, a3, a0
+; CHECK-NEXT: sltu a0, a0, a3
+; CHECK-NEXT: lw a1, 8(a1)
+; CHECK-NEXT: bnez a2, .LBB22_4
+; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: addi a2, a1, -1
+; CHECK-NEXT: xor a1, a1, a2
+; CHECK-NEXT: sltu a1, a2, a1
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB22_3:
+; CHECK-NEXT: addi a3, a0, -1
+; CHECK-NEXT: xor a0, a0, a3
+; CHECK-NEXT: sltu a0, a3, a0
+; CHECK-NEXT: lw a1, 8(a1)
+; CHECK-NEXT: beqz a2, .LBB22_2
+; CHECK-NEXT: .LBB22_4:
+; CHECK-NEXT: seqz a1, a1
+; CHECK-NEXT: sub a1, a2, a1
+; CHECK-NEXT: xor a2, a2, a1
+; CHECK-NEXT: sltu a1, a1, a2
+; CHECK-NEXT: ret
%1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
%2 = icmp eq <2 x i64> %1, <i64 1, i64 1>
ret <2 x i1> %2
}
define <2 x i1> @ctpop_v2i64_ne_one(<2 x i64> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i64_ne_one:
-; RV32I: # %bb.0:
-; RV32I-NEXT: lw a2, 0(a0)
-; RV32I-NEXT: lw a3, 4(a0)
-; RV32I-NEXT: lw a1, 12(a0)
-; RV32I-NEXT: beqz a3, .LBB23_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: seqz a2, a2
-; RV32I-NEXT: sub a2, a3, a2
-; RV32I-NEXT: xor a3, a3, a2
-; RV32I-NEXT: sltu a2, a2, a3
-; RV32I-NEXT: j .LBB23_3
-; RV32I-NEXT: .LBB23_2:
-; RV32I-NEXT: addi a3, a2, -1
-; RV32I-NEXT: xor a2, a2, a3
-; RV32I-NEXT: sltu a2, a3, a2
-; RV32I-NEXT: .LBB23_3:
-; RV32I-NEXT: lw a3, 8(a0)
-; RV32I-NEXT: xori a0, a2, 1
-; RV32I-NEXT: beqz a1, .LBB23_5
-; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: seqz a2, a3
-; RV32I-NEXT: sub a2, a1, a2
-; RV32I-NEXT: xor a1, a1, a2
-; RV32I-NEXT: sltu a1, a2, a1
-; RV32I-NEXT: xori a1, a1, 1
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB23_5:
-; RV32I-NEXT: addi a1, a3, -1
-; RV32I-NEXT: xor a3, a3, a1
-; RV32I-NEXT: sltu a1, a1, a3
-; RV32I-NEXT: xori a1, a1, 1
-; RV32I-NEXT: ret
-;
-; RV32ZBB-LABEL: ctpop_v2i64_ne_one:
-; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a1, 12(a0)
-; RV32ZBB-NEXT: lw a2, 8(a0)
-; RV32ZBB-NEXT: lw a3, 4(a0)
-; RV32ZBB-NEXT: lw a0, 0(a0)
-; RV32ZBB-NEXT: cpop a1, a1
-; RV32ZBB-NEXT: cpop a2, a2
-; RV32ZBB-NEXT: cpop a3, a3
-; RV32ZBB-NEXT: cpop a0, a0
-; RV32ZBB-NEXT: add a1, a2, a1
-; RV32ZBB-NEXT: add a0, a0, a3
-; RV32ZBB-NEXT: addi a0, a0, -1
-; RV32ZBB-NEXT: addi a1, a1, -1
-; RV32ZBB-NEXT: snez a0, a0
-; RV32ZBB-NEXT: snez a1, a1
-; RV32ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i64_ne_one:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a2, 0(a0)
+; CHECK-NEXT: lw a3, 4(a0)
+; CHECK-NEXT: lw a1, 12(a0)
+; CHECK-NEXT: beqz a3, .LBB23_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: seqz a2, a2
+; CHECK-NEXT: sub a2, a3, a2
+; CHECK-NEXT: xor a3, a3, a2
+; CHECK-NEXT: sltu a2, a2, a3
+; CHECK-NEXT: j .LBB23_3
+; CHECK-NEXT: .LBB23_2:
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: xor a2, a2, a3
+; CHECK-NEXT: sltu a2, a3, a2
+; CHECK-NEXT: .LBB23_3:
+; CHECK-NEXT: lw a3, 8(a0)
+; CHECK-NEXT: xori a0, a2, 1
+; CHECK-NEXT: beqz a1, .LBB23_5
+; CHECK-NEXT: # %bb.4:
+; CHECK-NEXT: seqz a2, a3
+; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: xor a1, a1, a2
+; CHECK-NEXT: sltu a1, a2, a1
+; CHECK-NEXT: xori a1, a1, 1
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB23_5:
+; CHECK-NEXT: addi a1, a3, -1
+; CHECK-NEXT: xor a3, a3, a1
+; CHECK-NEXT: sltu a1, a1, a3
+; CHECK-NEXT: xori a1, a1, 1
+; CHECK-NEXT: ret
%1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
%2 = icmp ne <2 x i64> %1, <i64 1, i64 1>
ret <2 x i1> %2
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index d133f9d1db389..d8b7bfcbceb27 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -762,108 +762,70 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind {
}
define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i32_ult_two:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi a2, a0, -1
-; RV64I-NEXT: addi a3, a1, -1
-; RV64I-NEXT: and a1, a1, a3
-; RV64I-NEXT: and a0, a0, a2
-; RV64I-NEXT: sext.w a1, a1
-; RV64I-NEXT: sext.w a0, a0
-; RV64I-NEXT: seqz a0, a0
-; RV64I-NEXT: seqz a1, a1
-; RV64I-NEXT: ret
-;
-; RV64ZBB-LABEL: ctpop_v2i32_ult_two:
-; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: cpopw a1, a1
-; RV64ZBB-NEXT: cpopw a0, a0
-; RV64ZBB-NEXT: sltiu a0, a0, 2
-; RV64ZBB-NEXT: sltiu a1, a1, 2
-; RV64ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i32_ult_two:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, -1
+; CHECK-NEXT: addi a3, a1, -1
+; CHECK-NEXT: and a1, a1, a3
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: sext.w a1, a1
+; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: seqz a1, a1
+; CHECK-NEXT: ret
%1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
%2 = icmp ult <2 x i32> %1, <i32 2, i32 2>
ret <2 x i1> %2
}
define <2 x i1> @ctpop_v2i32_ugt_one(<2 x i32> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i32_ugt_one:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi a2, a0, -1
-; RV64I-NEXT: addi a3, a1, -1
-; RV64I-NEXT: and a1, a1, a3
-; RV64I-NEXT: and a0, a0, a2
-; RV64I-NEXT: sext.w a1, a1
-; RV64I-NEXT: sext.w a0, a0
-; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: snez a1, a1
-; RV64I-NEXT: ret
-;
-; RV64ZBB-LABEL: ctpop_v2i32_ugt_one:
-; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: cpopw a1, a1
-; RV64ZBB-NEXT: cpopw a0, a0
-; RV64ZBB-NEXT: sltiu a0, a0, 2
-; RV64ZBB-NEXT: sltiu a1, a1, 2
-; RV64ZBB-NEXT: xori a0, a0, 1
-; RV64ZBB-NEXT: xori a1, a1, 1
-; RV64ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i32_ugt_one:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, -1
+; CHECK-NEXT: addi a3, a1, -1
+; CHECK-NEXT: and a1, a1, a3
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: sext.w a1, a1
+; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: snez a1, a1
+; CHECK-NEXT: ret
%1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
%2 = icmp ugt <2 x i32> %1, <i32 1, i32 1>
ret <2 x i1> %2
}
define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i32_eq_one:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a2, a0, -1
-; RV64I-NEXT: addiw a3, a1, -1
-; RV64I-NEXT: xor a1, a1, a3
-; RV64I-NEXT: xor a0, a0, a2
-; RV64I-NEXT: sext.w a1, a1
-; RV64I-NEXT: sext.w a0, a0
-; RV64I-NEXT: sltu a0, a2, a0
-; RV64I-NEXT: sltu a1, a3, a1
-; RV64I-NEXT: ret
-;
-; RV64ZBB-LABEL: ctpop_v2i32_eq_one:
-; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: cpopw a1, a1
-; RV64ZBB-NEXT: cpopw a0, a0
-; RV64ZBB-NEXT: addi a0, a0, -1
-; RV64ZBB-NEXT: addi a1, a1, -1
-; RV64ZBB-NEXT: seqz a0, a0
-; RV64ZBB-NEXT: seqz a1, a1
-; RV64ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i32_eq_one:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addiw a2, a0, -1
+; CHECK-NEXT: addiw a3, a1, -1
+; CHECK-NEXT: xor a1, a1, a3
+; CHECK-NEXT: xor a0, a0, a2
+; CHECK-NEXT: sext.w a1, a1
+; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: sltu a0, a2, a0
+; CHECK-NEXT: sltu a1, a3, a1
+; CHECK-NEXT: ret
%1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
%2 = icmp eq <2 x i32> %1, <i32 1, i32 1>
ret <2 x i1> %2
}
define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i32_ne_one:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a2, a0, -1
-; RV64I-NEXT: addiw a3, a1, -1
-; RV64I-NEXT: xor a1, a1, a3
-; RV64I-NEXT: xor a0, a0, a2
-; RV64I-NEXT: sext.w a1, a1
-; RV64I-NEXT: sext.w a0, a0
-; RV64I-NEXT: sltu a0, a2, a0
-; RV64I-NEXT: sltu a1, a3, a1
-; RV64I-NEXT: xori a0, a0, 1
-; RV64I-NEXT: xori a1, a1, 1
-; RV64I-NEXT: ret
-;
-; RV64ZBB-LABEL: ctpop_v2i32_ne_one:
-; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: cpopw a1, a1
-; RV64ZBB-NEXT: cpopw a0, a0
-; RV64ZBB-NEXT: addi a0, a0, -1
-; RV64ZBB-NEXT: addi a1, a1, -1
-; RV64ZBB-NEXT: snez a0, a0
-; RV64ZBB-NEXT: snez a1, a1
-; RV64ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i32_ne_one:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addiw a2, a0, -1
+; CHECK-NEXT: addiw a3, a1, -1
+; CHECK-NEXT: xor a1, a1, a3
+; CHECK-NEXT: xor a0, a0, a2
+; CHECK-NEXT: sext.w a1, a1
+; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: sltu a0, a2, a0
+; CHECK-NEXT: sltu a1, a3, a1
+; CHECK-NEXT: xori a0, a0, 1
+; CHECK-NEXT: xori a1, a1, 1
+; CHECK-NEXT: ret
%1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
%2 = icmp ne <2 x i32> %1, <i32 1, i32 1>
ret <2 x i1> %2
@@ -1052,100 +1014,62 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
}
define <2 x i1> @ctpop_v2i64_ult_two(<2 x i64> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i64_ult_two:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi a2, a0, -1
-; RV64I-NEXT: addi a3, a1, -1
-; RV64I-NEXT: and a1, a1, a3
-; RV64I-NEXT: and a0, a0, a2
-; RV64I-NEXT: seqz a0, a0
-; RV64I-NEXT: seqz a1, a1
-; RV64I-NEXT: ret
-;
-; RV64ZBB-LABEL: ctpop_v2i64_ult_two:
-; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: cpop a1, a1
-; RV64ZBB-NEXT: cpop a0, a0
-; RV64ZBB-NEXT: sltiu a0, a0, 2
-; RV64ZBB-NEXT: sltiu a1, a1, 2
-; RV64ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i64_ult_two:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, -1
+; CHECK-NEXT: addi a3, a1, -1
+; CHECK-NEXT: and a1, a1, a3
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: seqz a1, a1
+; CHECK-NEXT: ret
%1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
%2 = icmp ult <2 x i64> %1, <i64 2, i64 2>
ret <2 x i1> %2
}
define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i64_ugt_one:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi a2, a0, -1
-; RV64I-NEXT: addi a3, a1, -1
-; RV64I-NEXT: and a1, a1, a3
-; RV64I-NEXT: and a0, a0, a2
-; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: snez a1, a1
-; RV64I-NEXT: ret
-;
-; RV64ZBB-LABEL: ctpop_v2i64_ugt_one:
-; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: cpop a1, a1
-; RV64ZBB-NEXT: cpop a0, a0
-; RV64ZBB-NEXT: sltiu a0, a0, 2
-; RV64ZBB-NEXT: sltiu a1, a1, 2
-; RV64ZBB-NEXT: xori a0, a0, 1
-; RV64ZBB-NEXT: xori a1, a1, 1
-; RV64ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i64_ugt_one:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, -1
+; CHECK-NEXT: addi a3, a1, -1
+; CHECK-NEXT: and a1, a1, a3
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: snez a1, a1
+; CHECK-NEXT: ret
%1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
%2 = icmp ugt <2 x i64> %1, <i64 1, i64 1>
ret <2 x i1> %2
}
define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i64_eq_one:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi a2, a0, -1
-; RV64I-NEXT: addi a3, a1, -1
-; RV64I-NEXT: xor a1, a1, a3
-; RV64I-NEXT: xor a0, a0, a2
-; RV64I-NEXT: sltu a0, a2, a0
-; RV64I-NEXT: sltu a1, a3, a1
-; RV64I-NEXT: ret
-;
-; RV64ZBB-LABEL: ctpop_v2i64_eq_one:
-; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: cpop a1, a1
-; RV64ZBB-NEXT: cpop a0, a0
-; RV64ZBB-NEXT: addi a0, a0, -1
-; RV64ZBB-NEXT: addi a1, a1, -1
-; RV64ZBB-NEXT: seqz a0, a0
-; RV64ZBB-NEXT: seqz a1, a1
-; RV64ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i64_eq_one:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, -1
+; CHECK-NEXT: addi a3, a1, -1
+; CHECK-NEXT: xor a1, a1, a3
+; CHECK-NEXT: xor a0, a0, a2
+; CHECK-NEXT: sltu a0, a2, a0
+; CHECK-NEXT: sltu a1, a3, a1
+; CHECK-NEXT: ret
%1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
%2 = icmp eq <2 x i64> %1, <i64 1, i64 1>
ret <2 x i1> %2
}
define <2 x i1> @ctpop_v2i64_ne_one(<2 x i64> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i64_ne_one:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi a2, a0, -1
-; RV64I-NEXT: addi a3, a1, -1
-; RV64I-NEXT: xor a1, a1, a3
-; RV64I-NEXT: xor a0, a0, a2
-; RV64I-NEXT: sltu a0, a2, a0
-; RV64I-NEXT: sltu a1, a3, a1
-; RV64I-NEXT: xori a0, a0, 1
-; RV64I-NEXT: xori a1, a1, 1
-; RV64I-NEXT: ret
-;
-; RV64ZBB-LABEL: ctpop_v2i64_ne_one:
-; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: cpop a1, a1
-; RV64ZBB-NEXT: cpop a0, a0
-; RV64ZBB-NEXT: addi a0, a0, -1
-; RV64ZBB-NEXT: addi a1, a1, -1
-; RV64ZBB-NEXT: snez a0, a0
-; RV64ZBB-NEXT: snez a1, a1
-; RV64ZBB-NEXT: ret
+; CHECK-LABEL: ctpop_v2i64_ne_one:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, -1
+; CHECK-NEXT: addi a3, a1, -1
+; CHECK-NEXT: xor a1, a1, a3
+; CHECK-NEXT: xor a0, a0, a2
+; CHECK-NEXT: sltu a0, a2, a0
+; CHECK-NEXT: sltu a1, a3, a1
+; CHECK-NEXT: xori a0, a0, 1
+; CHECK-NEXT: xori a1, a1, 1
+; CHECK-NEXT: ret
%1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
%2 = icmp ne <2 x i64> %1, <i64 1, i64 1>
ret <2 x i1> %2
diff --git a/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll b/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
index 9e4a10d9eb864..9c5df5f70fc15 100644
--- a/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -p 'require<profile-summary>,function(codegenprepare)' -S %s \
; RUN: | FileCheck %s --check-prefix=SLOW
-; RUN: opt -p 'require<profile-summary>,function(codegenprepare)' -S --mattr=+zvbb %s \
+; RUN: opt -p 'require<profile-summary>,function(codegenprepare)' -S --mattr=+v,+zvbb %s \
; RUN: | FileCheck %s --check-prefix=FAST
; REQUIRES: riscv-registered-target
More information about the llvm-commits
mailing list