[llvm] af1a5c5 - [GISel][RISCV] Simplify the generated code for narrowScalarCTLS. (#180827)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 13 12:01:34 PST 2026
Author: Craig Topper
Date: 2026-02-13T12:01:29-08:00
New Revision: af1a5c5c24fa7d0f6fd4445242a0d8f2ce7dd2ba
URL: https://github.com/llvm/llvm-project/commit/af1a5c5c24fa7d0f6fd4445242a0d8f2ce7dd2ba
DIFF: https://github.com/llvm/llvm-project/commit/af1a5c5c24fa7d0f6fd4445242a0d8f2ce7dd2ba.diff
LOG: [GISel][RISCV] Simplify the generated code for narrowScalarCTLS. (#180827)
Instead of trying to make CTLS work for the Lo part, conditionally
invert Lo using the Hi sign bit, then do a CTLZ. If the CTLZ is
zero, then the Lo sign bit differs from the Hi sign bit. Otherwise,
each leading zero represents an additional sign bit.
This generates better code when CTLS and CTLZ are both supported.
I've added Zbb to the P extension command line for RISC-V since
P is likely to imply Zbb when it is ratified, but that isn't written
in the spec yet. If that doesn't happen, I expect CLZ would get
added back to the P extension.
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctls-rv32.mir
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e6eec3194b716..f6c57ed5537a3 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7582,26 +7582,22 @@ LegalizerHelper::narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx,
auto ShAmt = B.buildConstant(NarrowTy, NarrowSize - 1);
auto Sign = B.buildAShr(NarrowTy, Hi, ShAmt);
- auto LoSign = B.buildAShr(NarrowTy, Lo, ShAmt);
- auto LoSameSign = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
- LoSign.getReg(0), Sign.getReg(0));
+ auto HiIsSign = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), Hi, Sign);
- auto HiIsSign =
- B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), Hi, Sign.getReg(0));
+ // Invert Lo if Hi is negative. Then count the leading zeros. If there are no
+ // leading zeros, then the MSB of Lo is
diff erent than the MSB of Hi.
+ // Otherwise the leading zeros represent additional sign bits of the original
+ // value.
+ auto LoInv = B.buildXor(DstTy, Lo, Sign);
+ auto LoCTLZ = B.buildCTLZ(DstTy, LoInv);
- auto LoCTLS = B.buildCTLS(DstTy, Lo);
- auto GNarrowSize = B.buildConstant(DstTy, NarrowSize);
- auto HiIsSignCTLS = B.buildAdd(DstTy, LoCTLS, GNarrowSize);
-
- // If the low half flips sign, the run of redundant bits stops at the
- // boundary, so use (NarrowSize - 1) instead of extending into Lo.
- auto GNarrowSizeMinus1 = B.buildConstant(DstTy, NarrowSize - 1);
- auto HiSignResult =
- B.buildSelect(DstTy, LoSameSign, HiIsSignCTLS, GNarrowSizeMinus1);
+ // Add NarrowSize-1 to LoCTLZ. This is the full CTLS if Hi is all sign bits.
+ auto C_NarrowSizeM1 = B.buildConstant(DstTy, NarrowSize - 1);
+ auto HiIsSignCTLS = B.buildAdd(DstTy, LoCTLZ, C_NarrowSizeM1);
auto HiCTLS = B.buildCTLS(DstTy, Hi);
- B.buildSelect(DstReg, HiIsSign, HiSignResult, HiCTLS);
+ B.buildSelect(DstReg, HiIsSign, HiIsSignCTLS, HiCTLS);
MI.eraseFromParent();
return Legalized;
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctls-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctls-rv32.mir
index 924415f85b099..6f85a0a3c5a85 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctls-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctls-rv32.mir
@@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=riscv32 -run-pass=legalizer %s -o - \
# RUN: | FileCheck %s --check-prefix=RV32I
-# RUN: llc -mtriple=riscv32 -mattr=+experimental-p -run-pass=legalizer %s -o - \
+# RUN: llc -mtriple=riscv32 -mattr=+experimental-p,+zbb -run-pass=legalizer %s -o - \
# RUN: | FileCheck %s --check-prefix=RV32P
# RUN: llc -mtriple=riscv32 -mattr=+zbb -run-pass=legalizer %s -o - \
# RUN: | FileCheck %s --check-prefix=RV32ZBB
@@ -69,10 +69,9 @@ body: |
; RV32P: liveins: $x10
; RV32P-NEXT: {{ $}}
; RV32P-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32P-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8
+ ; RV32P-NEXT: [[CTLS:%[0-9]+]]:_(s32) = G_CTLS [[SEXT_INREG]](s32)
; RV32P-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
- ; RV32P-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
- ; RV32P-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
- ; RV32P-NEXT: [[CTLS:%[0-9]+]]:_(s32) = G_CTLS [[ASHR]](s32)
; RV32P-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLS]], [[C]]
; RV32P-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
; RV32P-NEXT: $x10 = COPY [[COPY1]](s32)
@@ -170,10 +169,9 @@ body: |
; RV32P: liveins: $x10
; RV32P-NEXT: {{ $}}
; RV32P-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32P-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16
+ ; RV32P-NEXT: [[CTLS:%[0-9]+]]:_(s32) = G_CTLS [[SEXT_INREG]](s32)
; RV32P-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; RV32P-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
- ; RV32P-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
- ; RV32P-NEXT: [[CTLS:%[0-9]+]]:_(s32) = G_CTLS [[ASHR]](s32)
; RV32P-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLS]], [[C]]
; RV32P-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
; RV32P-NEXT: $x10 = COPY [[COPY1]](s32)
@@ -297,11 +295,9 @@ body: |
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
; RV32I-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; RV32I-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32)
- ; RV32I-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
- ; RV32I-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ASHR1]](s32), [[ASHR]]
- ; RV32I-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[ASHR]]
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[ASHR]]
+ ; RV32I-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[ASHR]]
; RV32I-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV32I-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[ASHR1]]
; RV32I-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[XOR]], [[C1]](s32)
; RV32I-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[LSHR]]
; RV32I-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
@@ -337,9 +333,7 @@ body: |
; RV32I-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[ADD3]], [[C9]](s32)
; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[LSHR8]]
- ; RV32I-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]]
- ; RV32I-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[C10]]
- ; RV32I-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD4]], [[C]]
+ ; RV32I-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SUB1]], [[C]]
; RV32I-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[ASHR]]
; RV32I-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[XOR1]], [[C1]](s32)
; RV32I-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[XOR1]], [[LSHR9]]
@@ -353,10 +347,10 @@ body: |
; RV32I-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[LSHR13]]
; RV32I-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[OR9]], [[C1]](s32)
; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C6]]
- ; RV32I-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[OR9]], [[AND4]]
- ; RV32I-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[SUB3]], [[C2]](s32)
+ ; RV32I-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[OR9]], [[AND4]]
+ ; RV32I-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C2]](s32)
; RV32I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C7]]
- ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C7]]
+ ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C7]]
; RV32I-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[AND5]], [[AND6]]
; RV32I-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[ADD5]], [[C3]](s32)
; RV32I-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[LSHR16]], [[ADD5]]
@@ -366,11 +360,11 @@ body: |
; RV32I-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ADD7]], [[C5]](s32)
; RV32I-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[SHL3]]
; RV32I-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[ADD8]], [[C9]](s32)
- ; RV32I-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[LSHR17]]
- ; RV32I-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[C1]]
- ; RV32I-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SUB5]]
+ ; RV32I-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[LSHR17]]
+ ; RV32I-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB3]], [[C1]]
+ ; RV32I-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD4]], [[SUB4]]
; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV32I-NEXT: $x10 = COPY [[SELECT1]](s32)
+ ; RV32I-NEXT: $x10 = COPY [[SELECT]](s32)
; RV32I-NEXT: $x11 = COPY [[C11]](s32)
; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11
;
@@ -381,18 +375,15 @@ body: |
; RV32P-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
; RV32P-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; RV32P-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32)
- ; RV32P-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
- ; RV32P-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ASHR1]](s32), [[ASHR]]
- ; RV32P-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[ASHR]]
- ; RV32P-NEXT: [[CTLS:%[0-9]+]]:_(s32) = G_CTLS [[COPY]](s32)
- ; RV32P-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; RV32P-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTLS]], [[C1]]
- ; RV32P-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[C]]
- ; RV32P-NEXT: [[CTLS1:%[0-9]+]]:_(s32) = G_CTLS [[COPY1]](s32)
- ; RV32P-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[CTLS1]]
- ; RV32P-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV32P-NEXT: $x10 = COPY [[SELECT1]](s32)
- ; RV32P-NEXT: $x11 = COPY [[C2]](s32)
+ ; RV32P-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[ASHR]]
+ ; RV32P-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[ASHR]]
+ ; RV32P-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[XOR]](s32)
+ ; RV32P-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTLZ]], [[C]]
+ ; RV32P-NEXT: [[CTLS:%[0-9]+]]:_(s32) = G_CTLS [[COPY1]](s32)
+ ; RV32P-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[CTLS]]
+ ; RV32P-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32P-NEXT: $x10 = COPY [[SELECT]](s32)
+ ; RV32P-NEXT: $x11 = COPY [[C1]](s32)
; RV32P-NEXT: PseudoRET implicit $x10, implicit $x11
;
; RV32ZBB-LABEL: name: cls_i64
@@ -402,23 +393,18 @@ body: |
; RV32ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
; RV32ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; RV32ZBB-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32)
- ; RV32ZBB-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
- ; RV32ZBB-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ASHR1]](s32), [[ASHR]]
- ; RV32ZBB-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[ASHR]]
- ; RV32ZBB-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV32ZBB-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[ASHR1]]
+ ; RV32ZBB-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[ASHR]]
+ ; RV32ZBB-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[ASHR]]
; RV32ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[XOR]](s32)
- ; RV32ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]]
- ; RV32ZBB-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; RV32ZBB-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SUB]], [[C2]]
- ; RV32ZBB-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[C]]
+ ; RV32ZBB-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTLZ]], [[C]]
+ ; RV32ZBB-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; RV32ZBB-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[ASHR]]
; RV32ZBB-NEXT: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[XOR1]](s32)
- ; RV32ZBB-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ1]], [[C1]]
- ; RV32ZBB-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SUB1]]
- ; RV32ZBB-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV32ZBB-NEXT: $x10 = COPY [[SELECT1]](s32)
- ; RV32ZBB-NEXT: $x11 = COPY [[C3]](s32)
+ ; RV32ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ1]], [[C1]]
+ ; RV32ZBB-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[SUB]]
+ ; RV32ZBB-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32ZBB-NEXT: $x10 = COPY [[SELECT]](s32)
+ ; RV32ZBB-NEXT: $x11 = COPY [[C2]](s32)
; RV32ZBB-NEXT: PseudoRET implicit $x10, implicit $x11
%1:_(s32) = COPY $x10
%2:_(s32) = COPY $x11
More information about the llvm-commits
mailing list