[llvm] [RISCV] Improve fixed vector handling in isCtpopFast. (PR #158380)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 12 15:55:18 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
Previously we considered fixed vectors fast if Zvbb or Zbb is
enabled. Zbb only helps if the vector type will end up being
scalarized.
---
Full diff: https://github.com/llvm/llvm-project/pull/158380.diff
3 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+12-7)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll (+4-4)
- (modified) llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll (+12-8)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f9b484b98739f..b3c1082184162 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2752,7 +2752,7 @@ bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
case MVT::i8:
case MVT::i16:
case MVT::i32:
- return true;
+ return Subtarget.hasVInstructions();
case MVT::i64:
return Subtarget.hasVInstructionsI64();
case MVT::f16:
@@ -24840,12 +24840,17 @@ bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
}
bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
- if (VT.isScalableVector())
- return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
- if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
- return true;
- return Subtarget.hasCPOPLike() &&
- (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
+ if (VT.isVector()) {
+ EVT SVT = VT.getVectorElementType();
+ // If the element type is legal we can use cpop.v if it is enabled.
+ if (isLegalElementTypeForRVV(SVT))
+ return Subtarget.hasStdExtZvbb();
+ // If it will be scalarized, we might be able to use cpop.
+ return VT.isFixedLengthVector() && Subtarget.hasCPOPLike() &&
+ (SVT == MVT::i32 || SVT == MVT::i64);
+ }
+
+ return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64);
}
unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
index 44b9331fd2caf..474708383b4c1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
-; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
-; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
+; RUN: llc -mtriple=riscv32 -mattr=+zbb,+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+zbb,+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+zbb,+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
+; RUN: llc -mtriple=riscv64 -mattr=+zbb,+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
define void @ctpop_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: ctpop_v16i8:
diff --git a/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll b/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
index 9e4a10d9eb864..ed795c223ffaa 100644
--- a/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
@@ -18,8 +18,9 @@ define <4 x i1> @test_ult_2(<4 x i64> %x) {
;
; FAST-LABEL: define <4 x i1> @test_ult_2(
; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
-; FAST-NEXT: [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]])
-; FAST-NEXT: [[CMP1:%.*]] = icmp ult <4 x i64> [[CTPOP]], splat (i64 2)
+; FAST-NEXT: [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1)
+; FAST-NEXT: [[TMP2:%.*]] = and <4 x i64> [[X]], [[TMP1]]
+; FAST-NEXT: [[CMP1:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
; FAST-NEXT: ret <4 x i1> [[CMP1]]
;
%ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x)
@@ -37,8 +38,9 @@ define <4 x i1> @test_ugt_1(<4 x i64> %x) {
;
; FAST-LABEL: define <4 x i1> @test_ugt_1(
; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0]] {
-; FAST-NEXT: [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]])
-; FAST-NEXT: [[CMP1:%.*]] = icmp ugt <4 x i64> [[CTPOP]], splat (i64 1)
+; FAST-NEXT: [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1)
+; FAST-NEXT: [[TMP2:%.*]] = and <4 x i64> [[X]], [[TMP1]]
+; FAST-NEXT: [[CMP1:%.*]] = icmp ne <4 x i64> [[TMP2]], zeroinitializer
; FAST-NEXT: ret <4 x i1> [[CMP1]]
;
%ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x)
@@ -56,8 +58,9 @@ define <4 x i1> @test_eq_1(<4 x i64> %x) {
;
; FAST-LABEL: define <4 x i1> @test_eq_1(
; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0]] {
-; FAST-NEXT: [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]])
-; FAST-NEXT: [[CMP1:%.*]] = icmp eq <4 x i64> [[CTPOP]], splat (i64 1)
+; FAST-NEXT: [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1)
+; FAST-NEXT: [[TMP2:%.*]] = xor <4 x i64> [[X]], [[TMP1]]
+; FAST-NEXT: [[CMP1:%.*]] = icmp ugt <4 x i64> [[TMP2]], [[TMP1]]
; FAST-NEXT: ret <4 x i1> [[CMP1]]
;
%ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x)
@@ -75,8 +78,9 @@ define <4 x i1> @test_ne_1(<4 x i64> %x) {
;
; FAST-LABEL: define <4 x i1> @test_ne_1(
; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0]] {
-; FAST-NEXT: [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]])
-; FAST-NEXT: [[CMP1:%.*]] = icmp ne <4 x i64> [[CTPOP]], splat (i64 1)
+; FAST-NEXT: [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1)
+; FAST-NEXT: [[TMP2:%.*]] = xor <4 x i64> [[X]], [[TMP1]]
+; FAST-NEXT: [[CMP1:%.*]] = icmp ule <4 x i64> [[TMP2]], [[TMP1]]
; FAST-NEXT: ret <4 x i1> [[CMP1]]
;
%ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x)
``````````
</details>
https://github.com/llvm/llvm-project/pull/158380
More information about the llvm-commits
mailing list