[llvm] [GISel][RISCV] Compute CTPOP of small odd-sized integer correctly (PR #168559)
Hongyu Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 18 08:05:24 PST 2025
https://github.com/XChy created https://github.com/llvm/llvm-project/pull/168559
Fixes the assertion in #168523
This patch lifts the small, odd-sized integer to 8 bits, ensuring that the following lowering code behaves correctly.
>From f9a30e695a74dd7be4b3f167d3f82d8569c3a98d Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 18 Nov 2025 23:57:00 +0800
Subject: [PATCH] [RISCV][GISel] Compute CTPOP of small odd-size integer
correctly
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 12 ++++
.../test/CodeGen/RISCV/GlobalISel/bitmanip.ll | 56 ++++++++++++++++++
.../legalizer/legalize-ctpop-rv64.mir | 57 +++++++++++++++++++
3 files changed, 125 insertions(+)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d02f097fef829..e18fe67a82a17 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7678,6 +7678,18 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
unsigned Size = Ty.getSizeInBits();
MachineIRBuilder &B = MIRBuilder;
+ // Lift small odd-size integer to 8-bit integer.
+ if (Size < 8) {
+ LLT NewTy = LLT::scalar(8);
+ auto ZExt = B.buildZExt(NewTy, SrcReg);
+ auto NewCTPOP = B.buildCTPOP(NewTy, ZExt);
+ Observer.changingInstr(MI);
+ MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
+ MI.getOperand(1).setReg(NewCTPOP.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
// Count set bits in blocks of 2 bits. Default approach would be
// B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
// We use following formula instead:
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
index 68bc1e5db6095..52d96dd265899 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
@@ -205,3 +205,59 @@ define i24 @bitreverse_i24(i24 %x) {
%rev = call i24 @llvm.bitreverse.i24(i24 %x)
ret i24 %rev
}
+
+define i2 @test_ctpop_i2(i2 %a) {
+; RV32-LABEL: test_ctpop_i2:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: andi a0, a0, 3
+; RV32-NEXT: srli a1, a0, 1
+; RV32-NEXT: sub a0, a0, a1
+; RV32-NEXT: zext.b a1, a0
+; RV32-NEXT: srli a1, a1, 2
+; RV32-NEXT: andi a1, a1, 51
+; RV32-NEXT: andi a0, a0, 51
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: srli a1, a0, 4
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: andi a0, a0, 15
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: call __mulsi3
+; RV32-NEXT: zext.b a0, a0
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: .cfi_restore ra
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test_ctpop_i2:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: andi a0, a0, 3
+; RV64-NEXT: srli a1, a0, 1
+; RV64-NEXT: sub a0, a0, a1
+; RV64-NEXT: zext.b a1, a0
+; RV64-NEXT: srli a1, a1, 2
+; RV64-NEXT: andi a1, a1, 51
+; RV64-NEXT: andi a0, a0, 51
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: srli a1, a0, 4
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: andi a0, a0, 15
+; RV64-NEXT: li a1, 1
+; RV64-NEXT: call __muldi3
+; RV64-NEXT: zext.b a0, a0
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: .cfi_restore ra
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: .cfi_def_cfa_offset 0
+; RV64-NEXT: ret
+ %1 = call i2 @llvm.ctpop.i2(i2 %a)
+ ret i2 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
index c61c46df0a434..720417211385e 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
@@ -216,3 +216,60 @@ body: |
PseudoRET implicit $x10
...
+
+...
+---
+name: ctpop_i2
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; RV64I-LABEL: name: ctpop_i2
+ ; RV64I: liveins: $x10
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]]
+ ; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64)
+ ; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 85
+ ; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C2]]
+ ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[AND]](s64)
+ ; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY1]], [[AND1]]
+ ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
+ ; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C4]]
+ ; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C3]](s64)
+ ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 51
+ ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C5]]
+ ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C5]]
+ ; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[AND4]]
+ ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C6]](s64)
+ ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]]
+ ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
+ ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C7]]
+ ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND5]], [[C]]
+ ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64)
+ ; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64)
+ ; RV64I-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV64ZBB-LABEL: name: ctpop_i2
+ ; RV64ZBB: liveins: $x10
+ ; RV64ZBB-NEXT: {{ $}}
+ ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
+ ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64)
+ ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64)
+ ; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64)
+ ; RV64ZBB-NEXT: PseudoRET implicit $x10
+ %1:_(s64) = COPY $x10
+ %0:_(s2) = G_TRUNC %1(s64)
+ %2:_(s2) = G_CTPOP %0(s2)
+ %3:_(s64) = G_ANYEXT %2(s2)
+ $x10 = COPY %3(s64)
+ PseudoRET implicit $x10
+
+...
More information about the llvm-commits
mailing list