[llvm] [RISCV] Improve fixed vector handling in isCtpopFast. (PR #158380)

Fri Sep 12 15:55:18 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-risc-v

Author: Craig Topper (topperc)

<details>
<summary>Changes</summary>

Previously we considered fixed vectors fast if Zvbb or Zbb is
enabled. Zbb only helps if the vector type will end up being
scalarized.

---
Full diff: https://github.com/llvm/llvm-project/pull/158380.diff


3 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+12-7) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll (+4-4) 
- (modified) llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll (+12-8) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f9b484b98739f..b3c1082184162 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2752,7 +2752,7 @@ bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
   case MVT::i8:
   case MVT::i16:
   case MVT::i32:
-    return true;
+    return Subtarget.hasVInstructions();
   case MVT::i64:
     return Subtarget.hasVInstructionsI64();
   case MVT::f16:
@@ -24840,12 +24840,17 @@ bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
 }
 
 bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
-  if (VT.isScalableVector())
-    return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
-  if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
-    return true;
-  return Subtarget.hasCPOPLike() &&
-         (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
+  if (VT.isVector()) {
+    EVT SVT = VT.getVectorElementType();
+    // If the element type is legal we can use cpop.v if it is enabled.
+    if (isLegalElementTypeForRVV(SVT))
+      return Subtarget.hasStdExtZvbb();
+    // If it will be scalarized, we might be able to use cpop.
+    return VT.isFixedLengthVector() && Subtarget.hasCPOPLike() &&
+           (SVT == MVT::i32 || SVT == MVT::i64);
+  }
+
+  return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64);
 }
 
 unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
index 44b9331fd2caf..474708383b4c1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
-; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
-; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
+; RUN: llc -mtriple=riscv32 -mattr=+zbb,+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+zbb,+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+zbb,+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
+; RUN: llc -mtriple=riscv64 -mattr=+zbb,+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
 
 define void @ctpop_v16i8(ptr %x, ptr %y) {
 ; CHECK-LABEL: ctpop_v16i8:
diff --git a/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll b/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
index 9e4a10d9eb864..ed795c223ffaa 100644
--- a/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
@@ -18,8 +18,9 @@ define <4 x i1> @test_ult_2(<4 x i64> %x) {
 ;
 ; FAST-LABEL: define <4 x i1> @test_ult_2(
 ; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
-; FAST-NEXT:    [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]])
-; FAST-NEXT:    [[CMP1:%.*]] = icmp ult <4 x i64> [[CTPOP]], splat (i64 2)
+; FAST-NEXT:    [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1)
+; FAST-NEXT:    [[TMP2:%.*]] = and <4 x i64> [[X]], [[TMP1]]
+; FAST-NEXT:    [[CMP1:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
 ; FAST-NEXT:    ret <4 x i1> [[CMP1]]
 ;
   %ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x)
@@ -37,8 +38,9 @@ define <4 x i1> @test_ugt_1(<4 x i64> %x) {
 ;
 ; FAST-LABEL: define <4 x i1> @test_ugt_1(
 ; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0]] {
-; FAST-NEXT:    [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]])
-; FAST-NEXT:    [[CMP1:%.*]] = icmp ugt <4 x i64> [[CTPOP]], splat (i64 1)
+; FAST-NEXT:    [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1)
+; FAST-NEXT:    [[TMP2:%.*]] = and <4 x i64> [[X]], [[TMP1]]
+; FAST-NEXT:    [[CMP1:%.*]] = icmp ne <4 x i64> [[TMP2]], zeroinitializer
 ; FAST-NEXT:    ret <4 x i1> [[CMP1]]
 ;
   %ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x)
@@ -56,8 +58,9 @@ define <4 x i1> @test_eq_1(<4 x i64> %x) {
 ;
 ; FAST-LABEL: define <4 x i1> @test_eq_1(
 ; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0]] {
-; FAST-NEXT:    [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]])
-; FAST-NEXT:    [[CMP1:%.*]] = icmp eq <4 x i64> [[CTPOP]], splat (i64 1)
+; FAST-NEXT:    [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1)
+; FAST-NEXT:    [[TMP2:%.*]] = xor <4 x i64> [[X]], [[TMP1]]
+; FAST-NEXT:    [[CMP1:%.*]] = icmp ugt <4 x i64> [[TMP2]], [[TMP1]]
 ; FAST-NEXT:    ret <4 x i1> [[CMP1]]
 ;
   %ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x)
@@ -75,8 +78,9 @@ define <4 x i1> @test_ne_1(<4 x i64> %x) {
 ;
 ; FAST-LABEL: define <4 x i1> @test_ne_1(
 ; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0]] {
-; FAST-NEXT:    [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]])
-; FAST-NEXT:    [[CMP1:%.*]] = icmp ne <4 x i64> [[CTPOP]], splat (i64 1)
+; FAST-NEXT:    [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1)
+; FAST-NEXT:    [[TMP2:%.*]] = xor <4 x i64> [[X]], [[TMP1]]
+; FAST-NEXT:    [[CMP1:%.*]] = icmp ule <4 x i64> [[TMP2]], [[TMP1]]
 ; FAST-NEXT:    ret <4 x i1> [[CMP1]]
 ;
   %ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x)

``````````

</details>


https://github.com/llvm/llvm-project/pull/158380