[llvm-branch-commits] [llvm] 8b9e72f - Revert "[AMDGPU] Replace AMDGPUISD::FFBH_I32 with ISD::CTLS (#178420)"
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jan 29 21:17:28 PST 2026
Author: Kewen Meng
Date: 2026-01-29T21:17:25-08:00
New Revision: 8b9e72f78bafc9d8c4d3f30dbd4c77128b8e4a2a
URL: https://github.com/llvm/llvm-project/commit/8b9e72f78bafc9d8c4d3f30dbd4c77128b8e4a2a
DIFF: https://github.com/llvm/llvm-project/commit/8b9e72f78bafc9d8c4d3f30dbd4c77128b8e4a2a.diff
LOG: Revert "[AMDGPU] Replace AMDGPUISD::FFBH_I32 with ISD::CTLS (#178420)"
This reverts commit 65925b0405009177a4ede0b79bcfebfb48329b1e.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 5035cc6926546..0c9ae45f648e9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3346,7 +3346,7 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
DAG.getNode(ISD::ADD, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32),
OppositeSign);
// Count the leading sign bits.
- ShAmt = DAG.getNode(ISD::CTLS, SL, MVT::i32, Hi);
+ ShAmt = DAG.getNode(AMDGPUISD::FFBH_I32, SL, MVT::i32, Hi);
// Different from unsigned conversion, the shift should be one bit less to
// preserve the sign bit.
ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, ShAmt,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index eef084214e237..8a43c2da38346 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -320,6 +320,7 @@ def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
// ctlz with -1 if input is zero.
def AMDGPUffbh_u32_impl : SDNode<"AMDGPUISD::FFBH_U32", SDTIntBitCountUnaryOp>;
+def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>;
// cttz with -1 if input is zero.
def AMDGPUffbl_b32_impl : SDNode<"AMDGPUISD::FFBL_B32", SDTIntBitCountUnaryOp>;
@@ -486,7 +487,7 @@ def AMDGPUdiv_fixup : PatFrags<(ops node:$src0, node:$src1, node:$src2),
def AMDGPUffbh_i32 : PatFrags<(ops node:$src),
[(int_amdgcn_sffbh node:$src),
- (ctls node:$src)]>;
+ (AMDGPUffbh_i32_impl node:$src)]>;
def AMDGPUffbh_u32 : PatFrags<(ops node:$src),
[(ctlz_zero_undef node:$src),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 93313a8912a50..559fb041c424d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1359,12 +1359,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.widenScalarToNextPow2(0, 32)
.widenScalarToNextPow2(1, 32);
- getActionDefinitionsBuilder(G_CTLS)
- .legalFor({{S32, S32}})
- .clampScalar(0, S32, S32)
- .clampScalar(1, S32, S32)
- .scalarize(0);
-
// S64 is only legal on SALU, and needs to be broken into 32-bit elements in
// RegBankSelect.
getActionDefinitionsBuilder(G_BITREVERSE)
@@ -2768,7 +2762,8 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
auto X = B.buildXor(S32, Unmerge.getReg(0), Unmerge.getReg(1));
auto OppositeSign = B.buildAShr(S32, X, ThirtyOne);
auto MaxShAmt = B.buildAdd(S32, ThirtyTwo, OppositeSign);
- auto LS = B.buildCTLS(S32, Unmerge.getReg(1));
+ auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32})
+ .addUse(Unmerge.getReg(1));
auto LS2 = B.buildSub(S32, LS, One);
ShAmt = B.buildUMin(S32, LS2, MaxShAmt);
} else
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index fd946c36d4dbf..949cc69e2402c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4289,13 +4289,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID, Size);
break;
}
- case AMDGPU::G_CTLS: {
- unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI);
- OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
- OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
- break;
- }
case AMDGPU::G_CTPOP: {
unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b82000e3c1968..e8891b6a603b4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -516,7 +516,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
- setOperationAction(ISD::CTLS, MVT::i32, Legal);
// We only really have 32-bit BFE instructions (and 16-bit on VI).
//
@@ -10191,7 +10190,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case Intrinsic::amdgcn_sffbh:
- return DAG.getNode(ISD::CTLS, DL, VT, Op.getOperand(1));
+ return DAG.getNode(AMDGPUISD::FFBH_I32, DL, VT, Op.getOperand(1));
case Intrinsic::amdgcn_sbfe:
return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
index f6107900984b9..4cbdea64f1c00 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
@@ -123,7 +123,7 @@ body: |
; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
- ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV1]](s32)
+ ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
@@ -145,7 +145,7 @@ body: |
; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
- ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV1]](s32)
+ ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
@@ -467,7 +467,7 @@ body: |
; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
- ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV1]](s32)
+ ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[UMIN]](s32)
@@ -490,7 +490,7 @@ body: |
; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
- ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV1]](s32)
+ ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[UMIN]](s32)
@@ -524,7 +524,7 @@ body: |
; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
- ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV1]](s32)
+ ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
@@ -548,7 +548,7 @@ body: |
; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
- ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV1]](s32)
+ ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
@@ -585,7 +585,7 @@ body: |
; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV3]]
; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
- ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV3]](s32)
+ ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV3]](s32)
; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32)
@@ -600,7 +600,7 @@ body: |
; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV6]], [[UV7]]
; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[XOR1]], [[C2]](s32)
; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR1]]
- ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_CTLS [[UV7]](s32)
+ ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32)
; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C1]]
; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SUB2]], [[ADD1]]
; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32)
@@ -630,7 +630,7 @@ body: |
; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV3]]
; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
- ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV3]](s32)
+ ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV3]](s32)
; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32)
@@ -645,7 +645,7 @@ body: |
; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV6]], [[UV7]]
; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[XOR1]], [[C2]](s32)
; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR1]]
- ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_CTLS [[UV7]](s32)
+ ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32)
; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C1]]
; GFX8-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SUB2]], [[ADD1]]
; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll
index 328ce6ce037b6..9896e5f4c8cae 100644
--- a/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll
@@ -18,7 +18,7 @@ define amdgpu_kernel void @select_constant_cttz(ptr addrspace(1) noalias %out, p
; GCN-NEXT: s_and_b64 s[6:7], s[4:5], exec
; GCN-NEXT: s_cselect_b32 s2, -1, s2
; GCN-NEXT: s_flbit_i32 s6, s2
-; GCN-NEXT: s_xor_b32 s8, s6, 31
+; GCN-NEXT: s_sub_i32 s8, 31, s6
; GCN-NEXT: s_cmp_eq_u32 s2, 0
; GCN-NEXT: s_cselect_b64 s[6:7], -1, 0
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
More information about the llvm-branch-commits
mailing list