[llvm] Optimize count leading ones if promoted type (PR #99591)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 23 04:23:39 PDT 2024
https://github.com/v01dXYZ updated https://github.com/llvm/llvm-project/pull/99591
>From 83b525ef9bf37e2f6b9da56cd467bb312c1f0263 Mon Sep 17 00:00:00 2001
From: v01dxyz <v01dxyz at v01d.xyz>
Date: Tue, 9 Jul 2024 16:42:39 +0200
Subject: [PATCH] [CodeGen] Legalisation with promotion: optimise count leading
ones
(CTLZ (XOR Op -1))
-->
(CTLZ_ZERO_UNDEF (XOR (SHIFT (ANYEXTEND Op) ShiftAmount) -1))
The optimisation also applies for CTLZ_ZERO_UNDEF, VP_CTLZ, VP_CTLZ_ZERO_UNDEF.
Fixes https://github.com/llvm/llvm-project/issues/96455
---
llvm/lib/CodeGen/CodeGenPrepare.cpp | 23 +++-
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 36 +++++++
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 45 ++++++++
.../SelectionDAG/LegalizeIntegerTypes.cpp | 49 +++++++++
llvm/test/CodeGen/AArch64/ctlo.ll | 100 ++++++++++++++++++
.../test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll | 24 ++---
llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll | 52 +++++++++
llvm/test/CodeGen/X86/ctlo.ll | 26 ++---
8 files changed, 323 insertions(+), 32 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/ctlo.ll
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 22d0708f54786..37ba7d7b0b23e 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2298,8 +2298,29 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
if (match(CountZeros->getOperand(1), m_One()))
return false;
- // If it's cheap to speculate, there's nothing to do.
Type *Ty = CountZeros->getType();
+ EVT VTy = TLI->getValueType(*DL, Ty);
+
+ // do not despeculate if we have (ctlz (xor op -1)) if the operand is
+ // promoted as legalisation would later transform to:
+ //
+ // (ctlz (lshift (xor (extend op) -1)
+ // lshiftamount))
+ //
+ // Despeculation is not only useless but also not wanted with SelectionDAG
+ // as XOR and CTLZ would be in different basic blocks.
+ ConstantInt *C;
+ Value *Op0;
+ Value *Op1;
+ if ((TLI->getTypeAction(CountZeros->getContext(), VTy) ==
+ TargetLowering::TypePromoteInteger ||
+ TLI->getOperationAction(ISD::CTLZ, VTy) == TargetLowering::Promote) &&
+ match(CountZeros->getOperand(0), m_Xor(m_Value(Op0), m_Value(Op1))) &&
+ ((C = dyn_cast<ConstantInt>(Op0)) || (C = dyn_cast<ConstantInt>(Op1))) &&
+ C->isMinusOne())
+ return false;
+
+ // If it's cheap to speculate, there's nothing to do.
auto IntrinsicID = CountZeros->getIntrinsicID();
if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
(IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3f1094e0ac703..a334b14e0cc8b 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2356,6 +2356,35 @@ LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
}
+static bool extendCtlzNot(const MachineInstr &MI, MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI, LLT WideTy) {
+ Register XorSrc;
+ Register CstReg;
+ if (!mi_match(MI.getOperand(1).getReg(), MRI,
+ m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
+ return false;
+
+ auto OptCst = getIConstantVRegValWithLookThrough(CstReg, MRI);
+ APInt Cst = OptCst->Value;
+
+ if (!Cst.isAllOnes())
+ return false;
+
+ auto AllOnes = MIRBuilder.buildConstant(
+ WideTy, APInt::getAllOnes(WideTy.getSizeInBits()));
+ auto Res = MIRBuilder.buildAnyExt(WideTy, XorSrc);
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT CurTy = MRI.getType(SrcReg);
+ unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
+ Res = MIRBuilder.buildShl(WideTy, Res,
+ MIRBuilder.buildConstant(WideTy, SizeDiff));
+ Res = MIRBuilder.buildXor(WideTy, Res, AllOnes);
+ Res = MIRBuilder.buildCTLZ_ZERO_UNDEF(MI.getOperand(0), Res);
+
+ return true;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
switch (MI.getOpcode()) {
@@ -2449,6 +2478,13 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
LLT CurTy = MRI.getType(SrcReg);
unsigned NewOpc = MI.getOpcode();
+
+ if ((MI.getOpcode() == TargetOpcode::G_CTLZ ||
+ MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) &&
+ extendCtlzNot(MI, MIRBuilder, MRI, WideTy)) {
+ MI.eraseFromParent();
+ return Legalized;
+ }
if (NewOpc == TargetOpcode::G_CTTZ) {
// The count is the same in the larger type except if the original
// value was zero. This can be handled by setting the bit just off
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d6a0dd9ae9b20..f062ad2543dd9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -5049,6 +5049,40 @@ static MVT getPromotedVectorElementType(const TargetLowering &TLI,
return MidVT;
}
+// (CTLZ (XOR Op -1)) --> (TRUNCATE (CTLZ_ZERO_UNDEF
+// (XOR (SHIFT (ANYEXTEND Op1)
+// ShiftAmount)
+// -1)))
+static bool ExtendCtlzNot(SDNode *Node, SDValue &Result, SDLoc &dl, MVT OVT,
+ MVT NVT, SelectionDAG &DAG) {
+ SDValue NotOp = Node->getOperand(0);
+ if (NotOp.getOpcode() != ISD::XOR)
+ return false;
+
+ SDValue SrcOp = NotOp->getOperand(0);
+ SDValue CstOp = NotOp->getOperand(1);
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOp);
+
+ if (!Cst || !Cst->isAllOnes())
+ return false;
+
+ auto ExtSrc = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, SrcOp);
+ unsigned SHLAmount = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
+ auto ShiftConst =
+ DAG.getShiftAmountConstant(SHLAmount, ExtSrc.getValueType(), dl);
+ SDValue NSrcOp = DAG.getNode(ISD::SHL, dl, NVT, ExtSrc, ShiftConst);
+
+ SDValue NCstOp =
+ DAG.getConstant(APInt::getAllOnes(NVT.getScalarSizeInBits()), dl, NVT);
+
+ Result = DAG.getNode(NotOp->getOpcode(), dl, NVT, NSrcOp, NCstOp,
+ NotOp->getFlags());
+ Result = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Result);
+ Result = DAG.getNode(ISD::TRUNCATE, dl, OVT, Result);
+ return true;
+}
+
void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
LLVM_DEBUG(dbgs() << "Trying to promote node\n");
SmallVector<SDValue, 8> Results;
@@ -5084,6 +5118,13 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTLZ:
case ISD::CTPOP: {
+ // If the operand of CTLZ is NOT, push the extend in the NOT.
+ if (Node->getOpcode() == ISD::CTLZ &&
+ ExtendCtlzNot(Node, Tmp1, dl, OVT, NVT, DAG)) {
+ Results.push_back(Tmp1);
+ break;
+ }
+
// Zero extend the argument unless its cttz, then use any_extend.
if (Node->getOpcode() == ISD::CTTZ ||
Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
@@ -5115,6 +5156,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
break;
}
case ISD::CTLZ_ZERO_UNDEF: {
+ if (ExtendCtlzNot(Node, Tmp1, dl, OVT, NVT, DAG)) {
+ Results.push_back(Tmp1);
+ break;
+ }
// We know that the argument is unlikely to be zero, hence we can take a
// different approach as compared to ISD::CTLZ
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index fed5ebcc3c903..e55fed038f576 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -638,6 +638,47 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
return Result;
}
+// (CTLZ (XOR Op -1)) --> (CTLZ_ZERO_UNDEF (XOR (SHIFT (ANYEXTEND Op1)
+// ShiftAmount)
+// -1))
+static bool ExtendCtlzNot(SDNode *Node, SDValue &Result, SDLoc &dl, EVT OVT,
+ EVT NVT, SelectionDAG &DAG) {
+ SDValue NotOp = Node->getOperand(0);
+ if (NotOp.getOpcode() != ISD::XOR)
+ return false;
+
+ SDValue SrcOp = NotOp->getOperand(0);
+ SDValue CstOp = NotOp->getOperand(1);
+
+ if (!isAllOnesOrAllOnesSplat(CstOp))
+ return false;
+
+ auto ExtSrc = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, SrcOp);
+ unsigned SHLAmount = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
+ auto ShiftConst =
+ DAG.getShiftAmountConstant(SHLAmount, ExtSrc.getValueType(), dl);
+
+ SDValue NCstOp =
+ DAG.getConstant(APInt::getAllOnes(NVT.getScalarSizeInBits()), dl, NVT);
+ if (!Node->isVPOpcode()) {
+ SDValue NSrcOp = DAG.getNode(ISD::SHL, dl, NVT, ExtSrc, ShiftConst);
+
+ Result = DAG.getNode(ISD::XOR, dl, NVT, NSrcOp, NCstOp);
+ Result = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Result);
+ } else {
+ SDValue Mask = Node->getOperand(1);
+ SDValue EVL = Node->getOperand(2);
+
+ SDValue NSrcOp =
+ DAG.getNode(ISD::VP_SHL, dl, NVT, ExtSrc, ShiftConst, Mask, EVL);
+
+ Result = DAG.getNode(ISD::VP_XOR, dl, NVT, NSrcOp, NCstOp, Mask, EVL);
+ Result = DAG.getNode(ISD::VP_CTLZ_ZERO_UNDEF, dl, NVT, Result, Mask, EVL);
+ }
+
+ return true;
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
EVT OVT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
@@ -656,6 +697,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
}
unsigned CtlzOpcode = N->getOpcode();
+ // If the operand of CTLZ is NOT, push the extend in the NOT.
+ if (SDValue Res;
+ (CtlzOpcode == ISD::CTLZ || CtlzOpcode == ISD::CTLZ_ZERO_UNDEF ||
+ CtlzOpcode == ISD::VP_CTLZ || CtlzOpcode == ISD::VP_CTLZ_ZERO_UNDEF) &&
+ ExtendCtlzNot(N, Res, dl, OVT, NVT, DAG)) {
+ return Res;
+ }
+
if (CtlzOpcode == ISD::CTLZ || CtlzOpcode == ISD::VP_CTLZ) {
// Subtract off the extra leading bits in the bigger type.
SDValue ExtractLeadingBits = DAG.getConstant(
diff --git a/llvm/test/CodeGen/AArch64/ctlo.ll b/llvm/test/CodeGen/AArch64/ctlo.ll
new file mode 100644
index 0000000000000..5f15f540f458d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ctlo.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s --mtriple=aarch64 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s --mtriple=aarch64 -global-isel -verify-machineinstrs | FileCheck %s
+
+declare i8 @llvm.ctlz.i8(i8, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i8 @ctlo_i8(i8 %x) {
+; CHECK-LABEL: ctlo_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: eor w8, w8, w0, lsl #24
+; CHECK-NEXT: clz w0, w8
+; CHECK-NEXT: ret
+ %tmp1 = xor i8 %x, -1
+ %tmp2 = call i8 @llvm.ctlz.i8( i8 %tmp1, i1 false )
+ ret i8 %tmp2
+}
+
+define i8 @ctlo_i8_undef(i8 %x) {
+; CHECK-LABEL: ctlo_i8_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: eor w8, w8, w0, lsl #24
+; CHECK-NEXT: clz w0, w8
+; CHECK-NEXT: ret
+ %tmp1 = xor i8 %x, -1
+ %tmp2 = call i8 @llvm.ctlz.i8( i8 %tmp1, i1 true )
+ ret i8 %tmp2
+}
+
+define i16 @ctlo_i16(i16 %x) {
+; CHECK-LABEL: ctlo_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: eor w8, w8, w0, lsl #16
+; CHECK-NEXT: clz w0, w8
+; CHECK-NEXT: ret
+ %tmp1 = xor i16 %x, -1
+ %tmp2 = call i16 @llvm.ctlz.i16( i16 %tmp1, i1 false )
+ ret i16 %tmp2
+}
+
+define i16 @ctlo_i16_undef(i16 %x) {
+; CHECK-LABEL: ctlo_i16_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: eor w8, w8, w0, lsl #16
+; CHECK-NEXT: clz w0, w8
+; CHECK-NEXT: ret
+ %tmp1 = xor i16 %x, -1
+ %tmp2 = call i16 @llvm.ctlz.i16( i16 %tmp1, i1 true )
+ ret i16 %tmp2
+}
+
+define i32 @ctlo_i32(i32 %x) {
+; CHECK-LABEL: ctlo_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: clz w0, w8
+; CHECK-NEXT: ret
+ %tmp1 = xor i32 %x, -1
+ %tmp2 = call i32 @llvm.ctlz.i32( i32 %tmp1, i1 false )
+ ret i32 %tmp2
+}
+
+define i32 @ctlo_i32_undef(i32 %x) {
+; CHECK-LABEL: ctlo_i32_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: clz w0, w8
+; CHECK-NEXT: ret
+ %tmp1 = xor i32 %x, -1
+ %tmp2 = call i32 @llvm.ctlz.i32( i32 %tmp1, i1 true )
+ ret i32 %tmp2
+}
+
+define i64 @ctlo_i64(i64 %x) {
+; CHECK-LABEL: ctlo_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn x8, x0
+; CHECK-NEXT: clz x0, x8
+; CHECK-NEXT: ret
+ %tmp1 = xor i64 %x, -1
+ %tmp2 = call i64 @llvm.ctlz.i64( i64 %tmp1, i1 false )
+ ret i64 %tmp2
+}
+
+define i64 @ctlo_i64_undef(i64 %x) {
+; CHECK-LABEL: ctlo_i64_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn x8, x0
+; CHECK-NEXT: clz x0, x8
+; CHECK-NEXT: ret
+ %tmp1 = xor i64 %x, -1
+ %tmp2 = call i64 @llvm.ctlz.i64( i64 %tmp1, i1 true )
+ ret i64 %tmp2
+}
diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
index f17cec231f323..e993ecfcdf3b8 100644
--- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -89,18 +89,14 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
define i8 @test_not_ctlz_i8(i8 %a) nounwind {
; LA32-LABEL: test_not_ctlz_i8:
; LA32: # %bb.0:
-; LA32-NEXT: ori $a1, $zero, 255
-; LA32-NEXT: andn $a0, $a1, $a0
-; LA32-NEXT: clz.w $a0, $a0
-; LA32-NEXT: addi.w $a0, $a0, -24
+; LA32-NEXT: slli.w $a0, $a0, 24
+; LA32-NEXT: clo.w $a0, $a0
; LA32-NEXT: ret
;
; LA64-LABEL: test_not_ctlz_i8:
; LA64: # %bb.0:
-; LA64-NEXT: ori $a1, $zero, 255
-; LA64-NEXT: andn $a0, $a1, $a0
-; LA64-NEXT: clz.d $a0, $a0
-; LA64-NEXT: addi.d $a0, $a0, -56
+; LA64-NEXT: slli.d $a0, $a0, 56
+; LA64-NEXT: clo.d $a0, $a0
; LA64-NEXT: ret
%neg = xor i8 %a, -1
%tmp = call i8 @llvm.ctlz.i8(i8 %neg, i1 false)
@@ -110,18 +106,14 @@ define i8 @test_not_ctlz_i8(i8 %a) nounwind {
define i16 @test_not_ctlz_i16(i16 %a) nounwind {
; LA32-LABEL: test_not_ctlz_i16:
; LA32: # %bb.0:
-; LA32-NEXT: nor $a0, $a0, $zero
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: clz.w $a0, $a0
-; LA32-NEXT: addi.w $a0, $a0, -16
+; LA32-NEXT: slli.w $a0, $a0, 16
+; LA32-NEXT: clo.w $a0, $a0
; LA32-NEXT: ret
;
; LA64-LABEL: test_not_ctlz_i16:
; LA64: # %bb.0:
-; LA64-NEXT: nor $a0, $a0, $zero
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: clz.d $a0, $a0
-; LA64-NEXT: addi.d $a0, $a0, -48
+; LA64-NEXT: slli.d $a0, $a0, 48
+; LA64-NEXT: clo.d $a0, $a0
; LA64-NEXT: ret
%neg = xor i16 %a, -1
%tmp = call i16 @llvm.ctlz.i16(i16 %neg, i1 false)
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
index 58882525e55c4..6f89489bb39d6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
@@ -2624,6 +2624,58 @@ define <vscale x 1 x i9> @vp_ctlz_zero_undef_nxv1i9(<vscale x 1 x i9> %va, <vsca
%v = call <vscale x 1 x i9> @llvm.vp.ctlz.nxv1i9(<vscale x 1 x i9> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i9> %v
}
+define <vscale x 1 x i9> @vp_ctlo_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlo_nxv1i9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 7, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlo_nxv1i9:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vsll.vi v8, v8, 7, v0.t
+; CHECK-ZVBB-NEXT: vnot.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %va.not = xor <vscale x 1 x i9> %va, splat (i9 -1)
+ %v = call <vscale x 1 x i9> @llvm.vp.ctlz.nxv1i9(<vscale x 1 x i9> %va.not, i1 false, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i9> %v
+}
+define <vscale x 1 x i9> @vp_ctlo_zero_undef_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlo_zero_undef_nxv1i9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 7, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlo_zero_undef_nxv1i9:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vsll.vi v8, v8, 7, v0.t
+; CHECK-ZVBB-NEXT: vnot.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %va.not = xor <vscale x 1 x i9> %va, splat (i9 -1)
+ %v = call <vscale x 1 x i9> @llvm.vp.ctlz.nxv1i9(<vscale x 1 x i9> %va.not, i1 true, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i9> %v
+}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32: {{.*}}
; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/X86/ctlo.ll b/llvm/test/CodeGen/X86/ctlo.ll
index 7431f94f0fdf2..020d6d1b80136 100644
--- a/llvm/test/CodeGen/X86/ctlo.ll
+++ b/llvm/test/CodeGen/X86/ctlo.ll
@@ -46,20 +46,18 @@ define i8 @ctlo_i8(i8 %x) {
;
; X86-CLZ-LABEL: ctlo_i8:
; X86-CLZ: # %bb.0:
-; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-CLZ-NEXT: notb %al
-; X86-CLZ-NEXT: movzbl %al, %eax
+; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-CLZ-NEXT: shll $24, %eax
+; X86-CLZ-NEXT: notl %eax
; X86-CLZ-NEXT: lzcntl %eax, %eax
-; X86-CLZ-NEXT: addl $-24, %eax
; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
; X86-CLZ-NEXT: retl
;
; X64-CLZ-LABEL: ctlo_i8:
; X64-CLZ: # %bb.0:
-; X64-CLZ-NEXT: notb %dil
-; X64-CLZ-NEXT: movzbl %dil, %eax
-; X64-CLZ-NEXT: lzcntl %eax, %eax
-; X64-CLZ-NEXT: addl $-24, %eax
+; X64-CLZ-NEXT: shll $24, %edi
+; X64-CLZ-NEXT: notl %edi
+; X64-CLZ-NEXT: lzcntl %edi, %eax
; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
; X64-CLZ-NEXT: retq
%tmp1 = xor i8 %x, -1
@@ -89,20 +87,18 @@ define i8 @ctlo_i8_undef(i8 %x) {
;
; X86-CLZ-LABEL: ctlo_i8_undef:
; X86-CLZ: # %bb.0:
-; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-CLZ-NEXT: notb %al
-; X86-CLZ-NEXT: movzbl %al, %eax
+; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-CLZ-NEXT: shll $24, %eax
+; X86-CLZ-NEXT: notl %eax
; X86-CLZ-NEXT: lzcntl %eax, %eax
; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax
; X86-CLZ-NEXT: retl
;
; X64-CLZ-LABEL: ctlo_i8_undef:
; X64-CLZ: # %bb.0:
-; X64-CLZ-NEXT: notb %dil
-; X64-CLZ-NEXT: movzbl %dil, %eax
-; X64-CLZ-NEXT: shll $24, %eax
-; X64-CLZ-NEXT: lzcntl %eax, %eax
+; X64-CLZ-NEXT: shll $24, %edi
+; X64-CLZ-NEXT: notl %edi
+; X64-CLZ-NEXT: lzcntl %edi, %eax
; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
; X64-CLZ-NEXT: retq
%tmp1 = xor i8 %x, -1
More information about the llvm-commits
mailing list