[llvm] 4be1099 - [RISCV] Improve fixed vector handling in isCtpopFast. (#158380)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 16 09:47:13 PDT 2025


Author: Craig Topper
Date: 2025-09-16T09:47:09-07:00
New Revision: 4be1099607c97b9f28cd30d56149e7c6428c216c

URL: https://github.com/llvm/llvm-project/commit/4be1099607c97b9f28cd30d56149e7c6428c216c
DIFF: https://github.com/llvm/llvm-project/commit/4be1099607c97b9f28cd30d56149e7c6428c216c.diff

LOG: [RISCV] Improve fixed vector handling in isCtpopFast. (#158380)

Previously we considered fixed vectors fast if Zvbb or Zbb is
enabled. Zbb only helps if the vector type will end up being
scalarized.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rv32zbb.ll
    llvm/test/CodeGen/RISCV/rv64zbb.ll
    llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9d90eb0a65218..aa3a8697c0d55 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2752,7 +2752,7 @@ bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
   case MVT::i8:
   case MVT::i16:
   case MVT::i32:
-    return true;
+    return Subtarget.hasVInstructions();
   case MVT::i64:
     return Subtarget.hasVInstructionsI64();
   case MVT::f16:
@@ -24820,12 +24820,16 @@ bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
 }
 
 bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
-  if (VT.isScalableVector())
-    return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
-  if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
-    return true;
-  return Subtarget.hasCPOPLike() &&
-         (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
+  if (VT.isVector()) {
+    EVT SVT = VT.getVectorElementType();
+    // If the element type is legal we can use cpop.v if it is enabled.
+    if (isLegalElementTypeForRVV(SVT))
+      return Subtarget.hasStdExtZvbb();
+    // Don't consider it fast if the type needs to be legalized or scalarized.
+    return false;
+  }
+
+  return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64);
 }
 
 unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,

diff  --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 3b3ef72e32aa7..a1a843a7c1ba7 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -423,100 +423,62 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind {
 }
 
 define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i32_ult_two:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    addi a2, a0, -1
-; RV32I-NEXT:    addi a3, a1, -1
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    seqz a0, a0
-; RV32I-NEXT:    seqz a1, a1
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: ctpop_v2i32_ult_two:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    cpop a1, a1
-; RV32ZBB-NEXT:    cpop a0, a0
-; RV32ZBB-NEXT:    sltiu a0, a0, 2
-; RV32ZBB-NEXT:    sltiu a1, a1, 2
-; RV32ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i32_ult_two:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a2, a0, -1
+; CHECK-NEXT:    addi a3, a1, -1
+; CHECK-NEXT:    and a1, a1, a3
+; CHECK-NEXT:    and a0, a0, a2
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    seqz a1, a1
+; CHECK-NEXT:    ret
   %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
   %2 = icmp ult <2 x i32> %1, <i32 2, i32 2>
   ret <2 x i1> %2
 }
 
 define <2 x i1> @ctpop_v2i32_ugt_one(<2 x i32> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i32_ugt_one:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    addi a2, a0, -1
-; RV32I-NEXT:    addi a3, a1, -1
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    snez a0, a0
-; RV32I-NEXT:    snez a1, a1
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: ctpop_v2i32_ugt_one:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    cpop a1, a1
-; RV32ZBB-NEXT:    cpop a0, a0
-; RV32ZBB-NEXT:    sltiu a0, a0, 2
-; RV32ZBB-NEXT:    sltiu a1, a1, 2
-; RV32ZBB-NEXT:    xori a0, a0, 1
-; RV32ZBB-NEXT:    xori a1, a1, 1
-; RV32ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i32_ugt_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a2, a0, -1
+; CHECK-NEXT:    addi a3, a1, -1
+; CHECK-NEXT:    and a1, a1, a3
+; CHECK-NEXT:    and a0, a0, a2
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    snez a1, a1
+; CHECK-NEXT:    ret
   %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
   %2 = icmp ugt <2 x i32> %1, <i32 1, i32 1>
   ret <2 x i1> %2
 }
 
 define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i32_eq_one:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    addi a2, a0, -1
-; RV32I-NEXT:    addi a3, a1, -1
-; RV32I-NEXT:    xor a1, a1, a3
-; RV32I-NEXT:    xor a0, a0, a2
-; RV32I-NEXT:    sltu a0, a2, a0
-; RV32I-NEXT:    sltu a1, a3, a1
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: ctpop_v2i32_eq_one:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    cpop a1, a1
-; RV32ZBB-NEXT:    cpop a0, a0
-; RV32ZBB-NEXT:    addi a0, a0, -1
-; RV32ZBB-NEXT:    addi a1, a1, -1
-; RV32ZBB-NEXT:    seqz a0, a0
-; RV32ZBB-NEXT:    seqz a1, a1
-; RV32ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i32_eq_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a2, a0, -1
+; CHECK-NEXT:    addi a3, a1, -1
+; CHECK-NEXT:    xor a1, a1, a3
+; CHECK-NEXT:    xor a0, a0, a2
+; CHECK-NEXT:    sltu a0, a2, a0
+; CHECK-NEXT:    sltu a1, a3, a1
+; CHECK-NEXT:    ret
   %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
   %2 = icmp eq <2 x i32> %1, <i32 1, i32 1>
   ret <2 x i1> %2
 }
 
 define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i32_ne_one:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    addi a2, a0, -1
-; RV32I-NEXT:    addi a3, a1, -1
-; RV32I-NEXT:    xor a1, a1, a3
-; RV32I-NEXT:    xor a0, a0, a2
-; RV32I-NEXT:    sltu a0, a2, a0
-; RV32I-NEXT:    sltu a1, a3, a1
-; RV32I-NEXT:    xori a0, a0, 1
-; RV32I-NEXT:    xori a1, a1, 1
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: ctpop_v2i32_ne_one:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    cpop a1, a1
-; RV32ZBB-NEXT:    cpop a0, a0
-; RV32ZBB-NEXT:    addi a0, a0, -1
-; RV32ZBB-NEXT:    addi a1, a1, -1
-; RV32ZBB-NEXT:    snez a0, a0
-; RV32ZBB-NEXT:    snez a1, a1
-; RV32ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i32_ne_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a2, a0, -1
+; CHECK-NEXT:    addi a3, a1, -1
+; CHECK-NEXT:    xor a1, a1, a3
+; CHECK-NEXT:    xor a0, a0, a2
+; CHECK-NEXT:    sltu a0, a2, a0
+; CHECK-NEXT:    sltu a1, a3, a1
+; CHECK-NEXT:    xori a0, a0, 1
+; CHECK-NEXT:    xori a1, a1, 1
+; CHECK-NEXT:    ret
   %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
   %2 = icmp ne <2 x i32> %1, <i32 1, i32 1>
   ret <2 x i1> %2
@@ -792,200 +754,130 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
 }
 
 define <2 x i1> @ctpop_v2i64_ult_two(<2 x i64> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i64_ult_two:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    lw a1, 0(a0)
-; RV32I-NEXT:    lw a2, 8(a0)
-; RV32I-NEXT:    lw a3, 4(a0)
-; RV32I-NEXT:    lw a0, 12(a0)
-; RV32I-NEXT:    seqz a4, a1
-; RV32I-NEXT:    seqz a5, a2
-; RV32I-NEXT:    addi a6, a1, -1
-; RV32I-NEXT:    addi a7, a2, -1
-; RV32I-NEXT:    sub a4, a3, a4
-; RV32I-NEXT:    sub a5, a0, a5
-; RV32I-NEXT:    and a2, a2, a7
-; RV32I-NEXT:    and a1, a1, a6
-; RV32I-NEXT:    and a0, a0, a5
-; RV32I-NEXT:    and a3, a3, a4
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    or a2, a2, a0
-; RV32I-NEXT:    seqz a0, a1
-; RV32I-NEXT:    seqz a1, a2
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: ctpop_v2i64_ult_two:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    lw a1, 12(a0)
-; RV32ZBB-NEXT:    lw a2, 8(a0)
-; RV32ZBB-NEXT:    lw a3, 4(a0)
-; RV32ZBB-NEXT:    lw a0, 0(a0)
-; RV32ZBB-NEXT:    cpop a1, a1
-; RV32ZBB-NEXT:    cpop a2, a2
-; RV32ZBB-NEXT:    cpop a3, a3
-; RV32ZBB-NEXT:    cpop a0, a0
-; RV32ZBB-NEXT:    add a1, a2, a1
-; RV32ZBB-NEXT:    add a0, a0, a3
-; RV32ZBB-NEXT:    sltiu a0, a0, 2
-; RV32ZBB-NEXT:    sltiu a1, a1, 2
-; RV32ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i64_ult_two:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a0)
+; CHECK-NEXT:    lw a2, 8(a0)
+; CHECK-NEXT:    lw a3, 4(a0)
+; CHECK-NEXT:    lw a0, 12(a0)
+; CHECK-NEXT:    seqz a4, a1
+; CHECK-NEXT:    seqz a5, a2
+; CHECK-NEXT:    addi a6, a1, -1
+; CHECK-NEXT:    addi a7, a2, -1
+; CHECK-NEXT:    sub a4, a3, a4
+; CHECK-NEXT:    sub a5, a0, a5
+; CHECK-NEXT:    and a2, a2, a7
+; CHECK-NEXT:    and a1, a1, a6
+; CHECK-NEXT:    and a0, a0, a5
+; CHECK-NEXT:    and a3, a3, a4
+; CHECK-NEXT:    or a1, a1, a3
+; CHECK-NEXT:    or a2, a2, a0
+; CHECK-NEXT:    seqz a0, a1
+; CHECK-NEXT:    seqz a1, a2
+; CHECK-NEXT:    ret
   %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
   %2 = icmp ult <2 x i64> %1, <i64 2, i64 2>
   ret <2 x i1> %2
 }
 
 define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i64_ugt_one:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    lw a1, 0(a0)
-; RV32I-NEXT:    lw a2, 8(a0)
-; RV32I-NEXT:    lw a3, 4(a0)
-; RV32I-NEXT:    lw a0, 12(a0)
-; RV32I-NEXT:    seqz a4, a1
-; RV32I-NEXT:    seqz a5, a2
-; RV32I-NEXT:    addi a6, a1, -1
-; RV32I-NEXT:    addi a7, a2, -1
-; RV32I-NEXT:    sub a4, a3, a4
-; RV32I-NEXT:    sub a5, a0, a5
-; RV32I-NEXT:    and a2, a2, a7
-; RV32I-NEXT:    and a1, a1, a6
-; RV32I-NEXT:    and a0, a0, a5
-; RV32I-NEXT:    and a3, a3, a4
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    or a2, a2, a0
-; RV32I-NEXT:    snez a0, a1
-; RV32I-NEXT:    snez a1, a2
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: ctpop_v2i64_ugt_one:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    lw a1, 12(a0)
-; RV32ZBB-NEXT:    lw a2, 8(a0)
-; RV32ZBB-NEXT:    lw a3, 4(a0)
-; RV32ZBB-NEXT:    lw a0, 0(a0)
-; RV32ZBB-NEXT:    cpop a1, a1
-; RV32ZBB-NEXT:    cpop a2, a2
-; RV32ZBB-NEXT:    cpop a3, a3
-; RV32ZBB-NEXT:    cpop a0, a0
-; RV32ZBB-NEXT:    add a1, a2, a1
-; RV32ZBB-NEXT:    add a0, a0, a3
-; RV32ZBB-NEXT:    sltiu a0, a0, 2
-; RV32ZBB-NEXT:    sltiu a1, a1, 2
-; RV32ZBB-NEXT:    xori a0, a0, 1
-; RV32ZBB-NEXT:    xori a1, a1, 1
-; RV32ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i64_ugt_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a0)
+; CHECK-NEXT:    lw a2, 8(a0)
+; CHECK-NEXT:    lw a3, 4(a0)
+; CHECK-NEXT:    lw a0, 12(a0)
+; CHECK-NEXT:    seqz a4, a1
+; CHECK-NEXT:    seqz a5, a2
+; CHECK-NEXT:    addi a6, a1, -1
+; CHECK-NEXT:    addi a7, a2, -1
+; CHECK-NEXT:    sub a4, a3, a4
+; CHECK-NEXT:    sub a5, a0, a5
+; CHECK-NEXT:    and a2, a2, a7
+; CHECK-NEXT:    and a1, a1, a6
+; CHECK-NEXT:    and a0, a0, a5
+; CHECK-NEXT:    and a3, a3, a4
+; CHECK-NEXT:    or a1, a1, a3
+; CHECK-NEXT:    or a2, a2, a0
+; CHECK-NEXT:    snez a0, a1
+; CHECK-NEXT:    snez a1, a2
+; CHECK-NEXT:    ret
   %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
   %2 = icmp ugt <2 x i64> %1, <i64 1, i64 1>
   ret <2 x i1> %2
 }
 
 define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i64_eq_one:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    mv a1, a0
-; RV32I-NEXT:    lw a0, 0(a0)
-; RV32I-NEXT:    lw a3, 4(a1)
-; RV32I-NEXT:    lw a2, 12(a1)
-; RV32I-NEXT:    beqz a3, .LBB22_3
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    seqz a0, a0
-; RV32I-NEXT:    sub a0, a3, a0
-; RV32I-NEXT:    xor a3, a3, a0
-; RV32I-NEXT:    sltu a0, a0, a3
-; RV32I-NEXT:    lw a1, 8(a1)
-; RV32I-NEXT:    bnez a2, .LBB22_4
-; RV32I-NEXT:  .LBB22_2:
-; RV32I-NEXT:    addi a2, a1, -1
-; RV32I-NEXT:    xor a1, a1, a2
-; RV32I-NEXT:    sltu a1, a2, a1
-; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB22_3:
-; RV32I-NEXT:    addi a3, a0, -1
-; RV32I-NEXT:    xor a0, a0, a3
-; RV32I-NEXT:    sltu a0, a3, a0
-; RV32I-NEXT:    lw a1, 8(a1)
-; RV32I-NEXT:    beqz a2, .LBB22_2
-; RV32I-NEXT:  .LBB22_4:
-; RV32I-NEXT:    seqz a1, a1
-; RV32I-NEXT:    sub a1, a2, a1
-; RV32I-NEXT:    xor a2, a2, a1
-; RV32I-NEXT:    sltu a1, a1, a2
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: ctpop_v2i64_eq_one:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    lw a1, 12(a0)
-; RV32ZBB-NEXT:    lw a2, 8(a0)
-; RV32ZBB-NEXT:    lw a3, 4(a0)
-; RV32ZBB-NEXT:    lw a0, 0(a0)
-; RV32ZBB-NEXT:    cpop a1, a1
-; RV32ZBB-NEXT:    cpop a2, a2
-; RV32ZBB-NEXT:    cpop a3, a3
-; RV32ZBB-NEXT:    cpop a0, a0
-; RV32ZBB-NEXT:    add a1, a2, a1
-; RV32ZBB-NEXT:    add a0, a0, a3
-; RV32ZBB-NEXT:    addi a0, a0, -1
-; RV32ZBB-NEXT:    addi a1, a1, -1
-; RV32ZBB-NEXT:    seqz a0, a0
-; RV32ZBB-NEXT:    seqz a1, a1
-; RV32ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i64_eq_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    lw a0, 0(a0)
+; CHECK-NEXT:    lw a3, 4(a1)
+; CHECK-NEXT:    lw a2, 12(a1)
+; CHECK-NEXT:    beqz a3, .LBB22_3
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    sub a0, a3, a0
+; CHECK-NEXT:    xor a3, a3, a0
+; CHECK-NEXT:    sltu a0, a0, a3
+; CHECK-NEXT:    lw a1, 8(a1)
+; CHECK-NEXT:    bnez a2, .LBB22_4
+; CHECK-NEXT:  .LBB22_2:
+; CHECK-NEXT:    addi a2, a1, -1
+; CHECK-NEXT:    xor a1, a1, a2
+; CHECK-NEXT:    sltu a1, a2, a1
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB22_3:
+; CHECK-NEXT:    addi a3, a0, -1
+; CHECK-NEXT:    xor a0, a0, a3
+; CHECK-NEXT:    sltu a0, a3, a0
+; CHECK-NEXT:    lw a1, 8(a1)
+; CHECK-NEXT:    beqz a2, .LBB22_2
+; CHECK-NEXT:  .LBB22_4:
+; CHECK-NEXT:    seqz a1, a1
+; CHECK-NEXT:    sub a1, a2, a1
+; CHECK-NEXT:    xor a2, a2, a1
+; CHECK-NEXT:    sltu a1, a1, a2
+; CHECK-NEXT:    ret
   %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
   %2 = icmp eq <2 x i64> %1, <i64 1, i64 1>
   ret <2 x i1> %2
 }
 
 define <2 x i1> @ctpop_v2i64_ne_one(<2 x i64> %a) nounwind {
-; RV32I-LABEL: ctpop_v2i64_ne_one:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    lw a2, 0(a0)
-; RV32I-NEXT:    lw a3, 4(a0)
-; RV32I-NEXT:    lw a1, 12(a0)
-; RV32I-NEXT:    beqz a3, .LBB23_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    seqz a2, a2
-; RV32I-NEXT:    sub a2, a3, a2
-; RV32I-NEXT:    xor a3, a3, a2
-; RV32I-NEXT:    sltu a2, a2, a3
-; RV32I-NEXT:    j .LBB23_3
-; RV32I-NEXT:  .LBB23_2:
-; RV32I-NEXT:    addi a3, a2, -1
-; RV32I-NEXT:    xor a2, a2, a3
-; RV32I-NEXT:    sltu a2, a3, a2
-; RV32I-NEXT:  .LBB23_3:
-; RV32I-NEXT:    lw a3, 8(a0)
-; RV32I-NEXT:    xori a0, a2, 1
-; RV32I-NEXT:    beqz a1, .LBB23_5
-; RV32I-NEXT:  # %bb.4:
-; RV32I-NEXT:    seqz a2, a3
-; RV32I-NEXT:    sub a2, a1, a2
-; RV32I-NEXT:    xor a1, a1, a2
-; RV32I-NEXT:    sltu a1, a2, a1
-; RV32I-NEXT:    xori a1, a1, 1
-; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB23_5:
-; RV32I-NEXT:    addi a1, a3, -1
-; RV32I-NEXT:    xor a3, a3, a1
-; RV32I-NEXT:    sltu a1, a1, a3
-; RV32I-NEXT:    xori a1, a1, 1
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: ctpop_v2i64_ne_one:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    lw a1, 12(a0)
-; RV32ZBB-NEXT:    lw a2, 8(a0)
-; RV32ZBB-NEXT:    lw a3, 4(a0)
-; RV32ZBB-NEXT:    lw a0, 0(a0)
-; RV32ZBB-NEXT:    cpop a1, a1
-; RV32ZBB-NEXT:    cpop a2, a2
-; RV32ZBB-NEXT:    cpop a3, a3
-; RV32ZBB-NEXT:    cpop a0, a0
-; RV32ZBB-NEXT:    add a1, a2, a1
-; RV32ZBB-NEXT:    add a0, a0, a3
-; RV32ZBB-NEXT:    addi a0, a0, -1
-; RV32ZBB-NEXT:    addi a1, a1, -1
-; RV32ZBB-NEXT:    snez a0, a0
-; RV32ZBB-NEXT:    snez a1, a1
-; RV32ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i64_ne_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a2, 0(a0)
+; CHECK-NEXT:    lw a3, 4(a0)
+; CHECK-NEXT:    lw a1, 12(a0)
+; CHECK-NEXT:    beqz a3, .LBB23_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    seqz a2, a2
+; CHECK-NEXT:    sub a2, a3, a2
+; CHECK-NEXT:    xor a3, a3, a2
+; CHECK-NEXT:    sltu a2, a2, a3
+; CHECK-NEXT:    j .LBB23_3
+; CHECK-NEXT:  .LBB23_2:
+; CHECK-NEXT:    addi a3, a2, -1
+; CHECK-NEXT:    xor a2, a2, a3
+; CHECK-NEXT:    sltu a2, a3, a2
+; CHECK-NEXT:  .LBB23_3:
+; CHECK-NEXT:    lw a3, 8(a0)
+; CHECK-NEXT:    xori a0, a2, 1
+; CHECK-NEXT:    beqz a1, .LBB23_5
+; CHECK-NEXT:  # %bb.4:
+; CHECK-NEXT:    seqz a2, a3
+; CHECK-NEXT:    sub a2, a1, a2
+; CHECK-NEXT:    xor a1, a1, a2
+; CHECK-NEXT:    sltu a1, a2, a1
+; CHECK-NEXT:    xori a1, a1, 1
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB23_5:
+; CHECK-NEXT:    addi a1, a3, -1
+; CHECK-NEXT:    xor a3, a3, a1
+; CHECK-NEXT:    sltu a1, a1, a3
+; CHECK-NEXT:    xori a1, a1, 1
+; CHECK-NEXT:    ret
   %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
   %2 = icmp ne <2 x i64> %1, <i64 1, i64 1>
   ret <2 x i1> %2

diff  --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index d133f9d1db389..d8b7bfcbceb27 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -762,108 +762,70 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind {
 }
 
 define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i32_ult_two:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi a2, a0, -1
-; RV64I-NEXT:    addi a3, a1, -1
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    sext.w a1, a1
-; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    seqz a0, a0
-; RV64I-NEXT:    seqz a1, a1
-; RV64I-NEXT:    ret
-;
-; RV64ZBB-LABEL: ctpop_v2i32_ult_two:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    cpopw a1, a1
-; RV64ZBB-NEXT:    cpopw a0, a0
-; RV64ZBB-NEXT:    sltiu a0, a0, 2
-; RV64ZBB-NEXT:    sltiu a1, a1, 2
-; RV64ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i32_ult_two:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a2, a0, -1
+; CHECK-NEXT:    addi a3, a1, -1
+; CHECK-NEXT:    and a1, a1, a3
+; CHECK-NEXT:    and a0, a0, a2
+; CHECK-NEXT:    sext.w a1, a1
+; CHECK-NEXT:    sext.w a0, a0
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    seqz a1, a1
+; CHECK-NEXT:    ret
   %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
   %2 = icmp ult <2 x i32> %1, <i32 2, i32 2>
   ret <2 x i1> %2
 }
 
 define <2 x i1> @ctpop_v2i32_ugt_one(<2 x i32> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i32_ugt_one:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi a2, a0, -1
-; RV64I-NEXT:    addi a3, a1, -1
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    sext.w a1, a1
-; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    snez a0, a0
-; RV64I-NEXT:    snez a1, a1
-; RV64I-NEXT:    ret
-;
-; RV64ZBB-LABEL: ctpop_v2i32_ugt_one:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    cpopw a1, a1
-; RV64ZBB-NEXT:    cpopw a0, a0
-; RV64ZBB-NEXT:    sltiu a0, a0, 2
-; RV64ZBB-NEXT:    sltiu a1, a1, 2
-; RV64ZBB-NEXT:    xori a0, a0, 1
-; RV64ZBB-NEXT:    xori a1, a1, 1
-; RV64ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i32_ugt_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a2, a0, -1
+; CHECK-NEXT:    addi a3, a1, -1
+; CHECK-NEXT:    and a1, a1, a3
+; CHECK-NEXT:    and a0, a0, a2
+; CHECK-NEXT:    sext.w a1, a1
+; CHECK-NEXT:    sext.w a0, a0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    snez a1, a1
+; CHECK-NEXT:    ret
   %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
   %2 = icmp ugt <2 x i32> %1, <i32 1, i32 1>
   ret <2 x i1> %2
 }
 
 define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i32_eq_one:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a2, a0, -1
-; RV64I-NEXT:    addiw a3, a1, -1
-; RV64I-NEXT:    xor a1, a1, a3
-; RV64I-NEXT:    xor a0, a0, a2
-; RV64I-NEXT:    sext.w a1, a1
-; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    sltu a0, a2, a0
-; RV64I-NEXT:    sltu a1, a3, a1
-; RV64I-NEXT:    ret
-;
-; RV64ZBB-LABEL: ctpop_v2i32_eq_one:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    cpopw a1, a1
-; RV64ZBB-NEXT:    cpopw a0, a0
-; RV64ZBB-NEXT:    addi a0, a0, -1
-; RV64ZBB-NEXT:    addi a1, a1, -1
-; RV64ZBB-NEXT:    seqz a0, a0
-; RV64ZBB-NEXT:    seqz a1, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i32_eq_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addiw a2, a0, -1
+; CHECK-NEXT:    addiw a3, a1, -1
+; CHECK-NEXT:    xor a1, a1, a3
+; CHECK-NEXT:    xor a0, a0, a2
+; CHECK-NEXT:    sext.w a1, a1
+; CHECK-NEXT:    sext.w a0, a0
+; CHECK-NEXT:    sltu a0, a2, a0
+; CHECK-NEXT:    sltu a1, a3, a1
+; CHECK-NEXT:    ret
   %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
   %2 = icmp eq <2 x i32> %1, <i32 1, i32 1>
   ret <2 x i1> %2
 }
 
 define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i32_ne_one:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a2, a0, -1
-; RV64I-NEXT:    addiw a3, a1, -1
-; RV64I-NEXT:    xor a1, a1, a3
-; RV64I-NEXT:    xor a0, a0, a2
-; RV64I-NEXT:    sext.w a1, a1
-; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    sltu a0, a2, a0
-; RV64I-NEXT:    sltu a1, a3, a1
-; RV64I-NEXT:    xori a0, a0, 1
-; RV64I-NEXT:    xori a1, a1, 1
-; RV64I-NEXT:    ret
-;
-; RV64ZBB-LABEL: ctpop_v2i32_ne_one:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    cpopw a1, a1
-; RV64ZBB-NEXT:    cpopw a0, a0
-; RV64ZBB-NEXT:    addi a0, a0, -1
-; RV64ZBB-NEXT:    addi a1, a1, -1
-; RV64ZBB-NEXT:    snez a0, a0
-; RV64ZBB-NEXT:    snez a1, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i32_ne_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addiw a2, a0, -1
+; CHECK-NEXT:    addiw a3, a1, -1
+; CHECK-NEXT:    xor a1, a1, a3
+; CHECK-NEXT:    xor a0, a0, a2
+; CHECK-NEXT:    sext.w a1, a1
+; CHECK-NEXT:    sext.w a0, a0
+; CHECK-NEXT:    sltu a0, a2, a0
+; CHECK-NEXT:    sltu a1, a3, a1
+; CHECK-NEXT:    xori a0, a0, 1
+; CHECK-NEXT:    xori a1, a1, 1
+; CHECK-NEXT:    ret
   %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
   %2 = icmp ne <2 x i32> %1, <i32 1, i32 1>
   ret <2 x i1> %2
@@ -1052,100 +1014,62 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
 }
 
 define <2 x i1> @ctpop_v2i64_ult_two(<2 x i64> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i64_ult_two:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi a2, a0, -1
-; RV64I-NEXT:    addi a3, a1, -1
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    seqz a0, a0
-; RV64I-NEXT:    seqz a1, a1
-; RV64I-NEXT:    ret
-;
-; RV64ZBB-LABEL: ctpop_v2i64_ult_two:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    cpop a1, a1
-; RV64ZBB-NEXT:    cpop a0, a0
-; RV64ZBB-NEXT:    sltiu a0, a0, 2
-; RV64ZBB-NEXT:    sltiu a1, a1, 2
-; RV64ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i64_ult_two:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a2, a0, -1
+; CHECK-NEXT:    addi a3, a1, -1
+; CHECK-NEXT:    and a1, a1, a3
+; CHECK-NEXT:    and a0, a0, a2
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    seqz a1, a1
+; CHECK-NEXT:    ret
   %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
   %2 = icmp ult <2 x i64> %1, <i64 2, i64 2>
   ret <2 x i1> %2
 }
 
 define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i64_ugt_one:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi a2, a0, -1
-; RV64I-NEXT:    addi a3, a1, -1
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    snez a0, a0
-; RV64I-NEXT:    snez a1, a1
-; RV64I-NEXT:    ret
-;
-; RV64ZBB-LABEL: ctpop_v2i64_ugt_one:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    cpop a1, a1
-; RV64ZBB-NEXT:    cpop a0, a0
-; RV64ZBB-NEXT:    sltiu a0, a0, 2
-; RV64ZBB-NEXT:    sltiu a1, a1, 2
-; RV64ZBB-NEXT:    xori a0, a0, 1
-; RV64ZBB-NEXT:    xori a1, a1, 1
-; RV64ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i64_ugt_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a2, a0, -1
+; CHECK-NEXT:    addi a3, a1, -1
+; CHECK-NEXT:    and a1, a1, a3
+; CHECK-NEXT:    and a0, a0, a2
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    snez a1, a1
+; CHECK-NEXT:    ret
   %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
   %2 = icmp ugt <2 x i64> %1, <i64 1, i64 1>
   ret <2 x i1> %2
 }
 
 define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i64_eq_one:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi a2, a0, -1
-; RV64I-NEXT:    addi a3, a1, -1
-; RV64I-NEXT:    xor a1, a1, a3
-; RV64I-NEXT:    xor a0, a0, a2
-; RV64I-NEXT:    sltu a0, a2, a0
-; RV64I-NEXT:    sltu a1, a3, a1
-; RV64I-NEXT:    ret
-;
-; RV64ZBB-LABEL: ctpop_v2i64_eq_one:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    cpop a1, a1
-; RV64ZBB-NEXT:    cpop a0, a0
-; RV64ZBB-NEXT:    addi a0, a0, -1
-; RV64ZBB-NEXT:    addi a1, a1, -1
-; RV64ZBB-NEXT:    seqz a0, a0
-; RV64ZBB-NEXT:    seqz a1, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i64_eq_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a2, a0, -1
+; CHECK-NEXT:    addi a3, a1, -1
+; CHECK-NEXT:    xor a1, a1, a3
+; CHECK-NEXT:    xor a0, a0, a2
+; CHECK-NEXT:    sltu a0, a2, a0
+; CHECK-NEXT:    sltu a1, a3, a1
+; CHECK-NEXT:    ret
   %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
   %2 = icmp eq <2 x i64> %1, <i64 1, i64 1>
   ret <2 x i1> %2
 }
 
 define <2 x i1> @ctpop_v2i64_ne_one(<2 x i64> %a) nounwind {
-; RV64I-LABEL: ctpop_v2i64_ne_one:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi a2, a0, -1
-; RV64I-NEXT:    addi a3, a1, -1
-; RV64I-NEXT:    xor a1, a1, a3
-; RV64I-NEXT:    xor a0, a0, a2
-; RV64I-NEXT:    sltu a0, a2, a0
-; RV64I-NEXT:    sltu a1, a3, a1
-; RV64I-NEXT:    xori a0, a0, 1
-; RV64I-NEXT:    xori a1, a1, 1
-; RV64I-NEXT:    ret
-;
-; RV64ZBB-LABEL: ctpop_v2i64_ne_one:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    cpop a1, a1
-; RV64ZBB-NEXT:    cpop a0, a0
-; RV64ZBB-NEXT:    addi a0, a0, -1
-; RV64ZBB-NEXT:    addi a1, a1, -1
-; RV64ZBB-NEXT:    snez a0, a0
-; RV64ZBB-NEXT:    snez a1, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-LABEL: ctpop_v2i64_ne_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a2, a0, -1
+; CHECK-NEXT:    addi a3, a1, -1
+; CHECK-NEXT:    xor a1, a1, a3
+; CHECK-NEXT:    xor a0, a0, a2
+; CHECK-NEXT:    sltu a0, a2, a0
+; CHECK-NEXT:    sltu a1, a3, a1
+; CHECK-NEXT:    xori a0, a0, 1
+; CHECK-NEXT:    xori a1, a1, 1
+; CHECK-NEXT:    ret
   %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
   %2 = icmp ne <2 x i64> %1, <i64 1, i64 1>
   ret <2 x i1> %2

diff  --git a/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll b/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
index 9e4a10d9eb864..9c5df5f70fc15 100644
--- a/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -p 'require<profile-summary>,function(codegenprepare)' -S %s \
 ; RUN:   | FileCheck %s --check-prefix=SLOW
-; RUN: opt -p 'require<profile-summary>,function(codegenprepare)' -S --mattr=+zvbb %s \
+; RUN: opt -p 'require<profile-summary>,function(codegenprepare)' -S --mattr=+v,+zvbb %s \
 ; RUN:   | FileCheck %s --check-prefix=FAST
 ; REQUIRES: riscv-registered-target
 


        


More information about the llvm-commits mailing list