[llvm] Added InvertedFPClassTest in legalizerHelper (PR #133403)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 28 02:48:21 PDT 2025
https://github.com/sumesh-s-mcw created https://github.com/llvm/llvm-project/pull/133403
None
>From a21beec2729252d4b9f665fbaa3978a2b1b01cd2 Mon Sep 17 00:00:00 2001
From: sumesh-s-mcw <sumesh.suresh at multicorewareinc.com>
Date: Fri, 28 Mar 2025 14:32:53 +0530
Subject: [PATCH] Added InvertedFPClassTest in legalizerHelper
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 283 ++++++++++--------
1 file changed, 160 insertions(+), 123 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index a9f80860124fb..0a2a281cd5b50 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -53,8 +53,8 @@ using namespace MIPatternMatch;
///
/// Returns -1 in the first element of the pair if the breakdown is not
/// satisfiable.
-static std::pair<int, int>
-getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
+static std::pair<int, int> getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy,
+ LLT &LeftoverTy) {
assert(!LeftoverTy.isValid() && "this is an out argument");
unsigned Size = OrigTy.getSizeInBits();
@@ -160,10 +160,8 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
}
}
-void LegalizerHelper::insertParts(Register DstReg,
- LLT ResultTy, LLT PartTy,
- ArrayRef<Register> PartRegs,
- LLT LeftoverTy,
+void LegalizerHelper::insertParts(Register DstReg, LLT ResultTy, LLT PartTy,
+ ArrayRef<Register> PartRegs, LLT LeftoverTy,
ArrayRef<Register> LeftoverRegs) {
if (!LeftoverTy.isValid()) {
assert(LeftoverRegs.empty());
@@ -278,7 +276,7 @@ LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
// Shift the sign bit of the low register through the high register.
auto ShiftAmt =
- MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
+ MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
}
}
@@ -902,8 +900,8 @@ conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
&MI);
}
-static RTLIB::Libcall
-getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) {
+static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI,
+ const TargetLowering &TLI) {
RTLIB::Libcall RTLibcall;
switch (MI.getOpcode()) {
case TargetOpcode::G_GET_FPENV:
@@ -1318,8 +1316,10 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
}
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC: {
- Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
- Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
+ Type *FromTy =
+ getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
+ Type *ToTy =
+ getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
if (!FromTy || !ToTy)
return UnableToLegalize;
LegalizeResult Status =
@@ -1481,13 +1481,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
if (LeftoverBits != 0) {
LeftoverTy = LLT::scalar(LeftoverBits);
auto K = MIRBuilder.buildConstant(
- LeftoverTy,
- Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
+ LeftoverTy, Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
LeftoverRegs.push_back(K.getReg(0));
}
- insertParts(MI.getOperand(0).getReg(),
- Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
+ insertParts(MI.getOperand(0).getReg(), Ty, NarrowTy, PartRegs, LeftoverTy,
+ LeftoverRegs);
MI.eraseFromParent();
return Legalized;
@@ -2109,8 +2108,9 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
- Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
- MRI.createGenericVirtualRegister(WideTy);
+ Register NextResult = I + 1 == NumOps && WideTy == DstTy
+ ? DstReg
+ : MRI.createGenericVirtualRegister(WideTy);
auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
@@ -2355,8 +2355,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
if (Offset == 0) {
// Avoid a shift in the degenerate case.
- MIRBuilder.buildTrunc(DstReg,
- MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
+ MIRBuilder.buildTrunc(DstReg, MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
MI.eraseFromParent();
return Legalized;
}
@@ -2368,8 +2367,8 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
ShiftTy = WideTy;
}
- auto LShr = MIRBuilder.buildLShr(
- ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
+ auto LShr = MIRBuilder.buildLShr(ShiftTy, Src,
+ MIRBuilder.buildConstant(ShiftTy, Offset));
MIRBuilder.buildTrunc(DstReg, LShr);
MI.eraseFromParent();
return Legalized;
@@ -2707,8 +2706,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
// the top of the original type.
auto TopBit =
APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
- MIBSrc = MIRBuilder.buildOr(
- WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
+ MIBSrc = MIRBuilder.buildOr(WideTy, MIBSrc,
+ MIRBuilder.buildConstant(WideTy, TopBit));
// Now we know the operand is non-zero, use the more relaxed opcode.
NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
}
@@ -3045,8 +3044,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changingInstr(MI);
- unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
- TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
+ unsigned ExtType = Ty.getScalarSizeInBits() == 1 ? TargetOpcode::G_ZEXT
+ : TargetOpcode::G_ANYEXT;
widenScalarSrc(MI, WideTy, 0, ExtType);
Observer.changedInstr(MI);
@@ -3560,8 +3559,9 @@ static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
auto OffsetMask = B.buildConstant(
IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
- return B.buildShl(IdxTy, OffsetIdx,
- B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
+ return B
+ .buildShl(IdxTy, OffsetIdx, B.buildConstant(IdxTy, Log2_32(OldEltSize)))
+ .getReg(0);
}
/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
@@ -3614,7 +3614,8 @@ LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
- auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
+ auto Elt =
+ MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
NewOps[I] = Elt.getReg(0);
}
@@ -3655,13 +3656,14 @@ LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
Register WideElt = CastVec;
if (CastTy.isVector()) {
- WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
- ScaledIdx).getReg(0);
+ WideElt =
+ MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, ScaledIdx)
+ .getReg(0);
}
// Compute the bit offset into the register of the target element.
Register OffsetBits = getBitcastWiderVectorElementOffset(
- MIRBuilder, Idx, NewEltSize, OldEltSize);
+ MIRBuilder, Idx, NewEltSize, OldEltSize);
// Shift the wide element to get the target element.
auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
@@ -3677,18 +3679,17 @@ LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
/// TargetReg, while preserving other bits in \p TargetReg.
///
/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
-static Register buildBitFieldInsert(MachineIRBuilder &B,
- Register TargetReg, Register InsertReg,
- Register OffsetBits) {
+static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg,
+ Register InsertReg, Register OffsetBits) {
LLT TargetTy = B.getMRI()->getType(TargetReg);
LLT InsertTy = B.getMRI()->getType(InsertReg);
auto ZextVal = B.buildZExt(TargetTy, InsertReg);
auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
// Produce a bitmask of the value to insert
- auto EltMask = B.buildConstant(
- TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
- InsertTy.getSizeInBits()));
+ auto EltMask =
+ B.buildConstant(TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
+ InsertTy.getSizeInBits()));
// Shift it into position
auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
@@ -3743,19 +3744,22 @@ LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
Register ExtractedElt = CastVec;
if (CastTy.isVector()) {
- ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
- ScaledIdx).getReg(0);
+ ExtractedElt =
+ MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, ScaledIdx)
+ .getReg(0);
}
// Compute the bit offset into the register of the target element.
Register OffsetBits = getBitcastWiderVectorElementOffset(
- MIRBuilder, Idx, NewEltSize, OldEltSize);
+ MIRBuilder, Idx, NewEltSize, OldEltSize);
- Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
- Val, OffsetBits);
+ Register InsertedElt =
+ buildBitFieldInsert(MIRBuilder, ExtractedElt, Val, OffsetBits);
if (CastTy.isVector()) {
- InsertedElt = MIRBuilder.buildInsertVectorElement(
- CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
+ InsertedElt =
+ MIRBuilder
+ .buildInsertVectorElement(CastTy, CastVec, InsertedElt, ScaledIdx)
+ .getReg(0);
}
MIRBuilder.buildBitcast(Dst, InsertedElt);
@@ -4196,15 +4200,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
// Generate the PtrAdd and truncating stores.
LLT PtrTy = MRI.getType(PtrReg);
- auto OffsetCst = MIRBuilder.buildConstant(
- LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
- auto SmallPtr =
- MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
+ auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
+ LargeSplitSize / 8);
+ auto SmallPtr = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
MachineMemOperand *LargeMMO =
- MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
MachineMemOperand *SmallMMO =
- MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+ MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
StoreMI.eraseFromParent();
@@ -4338,16 +4341,16 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
// Legalize an instruction by changing the opcode in place.
void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
- Observer.changingInstr(MI);
- MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
- Observer.changedInstr(MI);
+ Observer.changingInstr(MI);
+ MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
+ Observer.changedInstr(MI);
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
using namespace TargetOpcode;
- switch(MI.getOpcode()) {
+ switch (MI.getOpcode()) {
default:
return UnableToLegalize;
case TargetOpcode::G_FCONSTANT:
@@ -4606,7 +4609,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
LLT DstTy = MRI.getType(DstReg);
Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
- auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
+ auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() -
+ SizeInBits);
MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
MI.eraseFromParent();
@@ -4689,7 +4693,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerEXT(MI);
case G_TRUNC:
return lowerTRUNC(MI);
- GISEL_VECREDUCE_CASES_NONSEQ
+ GISEL_VECREDUCE_CASES_NONSEQ
return lowerVectorReduction(MI);
case G_VAARG:
return lowerVAArg(MI);
@@ -5232,7 +5236,8 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
LLT LeftoverTy;
SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
if (IsLoad) {
- std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
+ std::tie(NumParts, NumLeftover) =
+ getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
} else {
if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
NarrowLeftoverRegs, MIRBuilder, MRI)) {
@@ -5291,8 +5296,8 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
if (IsLoad) {
- insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
- LeftoverTy, NarrowLeftoverRegs);
+ insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, LeftoverTy,
+ NarrowLeftoverRegs);
}
LdStMI.eraseFromParent();
@@ -5456,7 +5461,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
case G_SEXT_INREG:
return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
- GISEL_VECREDUCE_CASES_NONSEQ
+ GISEL_VECREDUCE_CASES_NONSEQ
return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_VECREDUCE_SEQ_FADD:
case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
@@ -5772,7 +5777,7 @@ LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
// one NarrowTy size value left.
while (SplitSrcs.size() > 1) {
SmallVector<Register> PartialRdxs;
- for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
+ for (unsigned Idx = 0; Idx < SplitSrcs.size() - 1; Idx += 2) {
Register LHS = SplitSrcs[Idx];
Register RHS = SplitSrcs[Idx + 1];
// Create the intermediate vector op.
@@ -5789,9 +5794,8 @@ LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
- const LLT HalfTy, const LLT AmtTy) {
+LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarShiftByConstant(
+ MachineInstr &MI, const APInt &Amt, const LLT HalfTy, const LLT AmtTy) {
Register InL = MRI.createGenericVirtualRegister(HalfTy);
Register InH = MRI.createGenericVirtualRegister(HalfTy);
@@ -5961,13 +5965,13 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
// Long: ShAmt >= NewBitSize
MachineInstrBuilder HiL;
if (MI.getOpcode() == TargetOpcode::G_LSHR) {
- HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
+ HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
} else {
auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
- HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
+ HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
}
auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
- {InH, AmtExcess}); // Lo from Hi part.
+ {InH, AmtExcess}); // Lo from Hi part.
auto Lo = MIRBuilder.buildSelect(
HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
@@ -6747,7 +6751,7 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
InsertOffset = OpStart - DstStart;
ExtractOffset = 0;
SegSize =
- std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
+ std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
}
Register SegReg = OpReg;
@@ -6798,19 +6802,18 @@ LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
- {Src0Regs[I], Src1Regs[I]});
+ {Src0Regs[I], Src1Regs[I]});
DstRegs.push_back(Inst.getReg(0));
}
for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
- auto Inst = MIRBuilder.buildInstr(
- MI.getOpcode(),
- {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
+ auto Inst =
+ MIRBuilder.buildInstr(MI.getOpcode(), {LeftoverTy},
+ {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
DstLeftoverRegs.push_back(Inst.getReg(0));
}
- insertParts(DstReg, DstTy, NarrowTy, DstRegs,
- LeftoverTy, DstLeftoverRegs);
+ insertParts(DstReg, DstTy, NarrowTy, DstRegs, LeftoverTy, DstLeftoverRegs);
MI.eraseFromParent();
return Legalized;
@@ -6830,7 +6833,8 @@ LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
SmallVector<Register, 8> Parts;
LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
- LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
+ LLT LCMTy =
+ buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
MI.eraseFromParent();
@@ -6865,19 +6869,18 @@ LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
llvm_unreachable("inconsistent extractParts result");
for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
- auto Select = MIRBuilder.buildSelect(NarrowTy,
- CondReg, Src1Regs[I], Src2Regs[I]);
+ auto Select =
+ MIRBuilder.buildSelect(NarrowTy, CondReg, Src1Regs[I], Src2Regs[I]);
DstRegs.push_back(Select.getReg(0));
}
for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
auto Select = MIRBuilder.buildSelect(
- LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
+ LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
DstLeftoverRegs.push_back(Select.getReg(0));
}
- insertParts(DstReg, DstTy, NarrowTy, DstRegs,
- LeftoverTy, DstLeftoverRegs);
+ insertParts(DstReg, DstTy, NarrowTy, DstRegs, LeftoverTy, DstLeftoverRegs);
MI.eraseFromParent();
return Legalized;
@@ -6901,9 +6904,8 @@ LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
auto C_0 = B.buildConstant(NarrowTy, 0);
auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
UnmergeSrc.getReg(1), C_0);
- auto LoCTLZ = IsUndef ?
- B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
- B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
+ auto LoCTLZ = IsUndef ? B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0))
+ : B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
@@ -6934,9 +6936,8 @@ LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
auto C_0 = B.buildConstant(NarrowTy, 0);
auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
UnmergeSrc.getReg(0), C_0);
- auto HiCTTZ = IsUndef ?
- B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
- B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
+ auto HiCTTZ = IsUndef ? B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1))
+ : B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
@@ -7140,7 +7141,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
- assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
+ assert(Size <= 128 && "Scalar size is too large for CTPOP lower algorithm");
// 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
// bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
@@ -7740,8 +7741,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
- auto SignMask = MIRBuilder.buildConstant(SrcTy,
- APInt::getSignMask(SrcEltBits));
+ auto SignMask =
+ MIRBuilder.buildConstant(SrcTy, APInt::getSignMask(SrcEltBits));
auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
@@ -7763,8 +7764,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
const LLT S1 = LLT::scalar(1);
- auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
- S1, Exponent, ExponentLoBit);
+ auto CmpGt =
+ MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, Exponent, ExponentLoBit);
R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
@@ -7773,8 +7774,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
- auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
- S1, Exponent, ZeroSrcTy);
+ auto ExponentLt0 =
+ MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, Exponent, ZeroSrcTy);
auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
@@ -7918,13 +7919,13 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
// Subtract the fp64 exponent bias (1023) to get the real exponent and
// add the f16 bias (15) to get the biased exponent for the f16 format.
E = MIRBuilder.buildAdd(
- S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
+ S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
- auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
- MIRBuilder.buildConstant(S32, 0x1ff));
+ auto MaskedSig =
+ MIRBuilder.buildAnd(S32, UH, MIRBuilder.buildConstant(S32, 0x1ff));
MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
auto Zero = MIRBuilder.buildConstant(S32, 0);
@@ -7950,14 +7951,14 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
- auto SigSetHigh = MIRBuilder.buildOr(S32, M,
- MIRBuilder.buildConstant(S32, 0x1000));
+ auto SigSetHigh =
+ MIRBuilder.buildOr(S32, M, MIRBuilder.buildConstant(S32, 0x1000));
auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
auto D0 = MIRBuilder.buildShl(S32, D, B);
- auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
- D0, SigSetHigh);
+ auto D0_NE_SigSetHigh =
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, D0, SigSetHigh);
auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
D = MIRBuilder.buildOr(S32, D, D1);
@@ -7978,13 +7979,13 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
V1 = MIRBuilder.buildOr(S32, V0, V1);
V = MIRBuilder.buildAdd(S32, V, V1);
- auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
- E, MIRBuilder.buildConstant(S32, 30));
+ auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, E,
+ MIRBuilder.buildConstant(S32, 30));
V = MIRBuilder.buildSelect(S32, CmpEGt30,
MIRBuilder.buildConstant(S32, 0x7c00), V);
- auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
- E, MIRBuilder.buildConstant(S32, 1039));
+ auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, E,
+ MIRBuilder.buildConstant(S32, 1039));
V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
// Extract the sign bit.
@@ -8102,11 +8103,11 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
const int Src0Size = Src0Ty.getScalarSizeInBits();
const int Src1Size = Src1Ty.getScalarSizeInBits();
- auto SignBitMask = MIRBuilder.buildConstant(
- Src0Ty, APInt::getSignMask(Src0Size));
+ auto SignBitMask =
+ MIRBuilder.buildConstant(Src0Ty, APInt::getSignMask(Src0Size));
auto NotSignBitMask = MIRBuilder.buildConstant(
- Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
+ Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
Register And1;
@@ -8140,8 +8141,9 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
- unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
- TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
+ unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM
+ ? TargetOpcode::G_FMINNUM_IEEE
+ : TargetOpcode::G_FMAXNUM_IEEE;
auto [Dst, Src0, Src1] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Dst);
@@ -8173,8 +8175,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
LLT Ty = MRI.getType(DstReg);
unsigned Flags = MI.getFlags();
- auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
- Flags);
+ auto Mul =
+ MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2), Flags);
MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
MI.eraseFromParent();
return Legalized;
@@ -8227,10 +8229,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFFloor(MachineInstr &MI) {
auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
- auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
- SrcReg, Zero, Flags);
- auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
- SrcReg, Trunc, Flags);
+ auto Lt0 =
+ MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy, SrcReg, Zero, Flags);
+ auto NeTrunc =
+ MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy, SrcReg, Trunc, Flags);
auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
@@ -8254,8 +8256,9 @@ LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
Register SrcReg = MI.getOperand(I).getReg();
auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
- Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
- MRI.createGenericVirtualRegister(WideTy);
+ Register NextResult = I + 1 == NumOps && WideTy == DstTy
+ ? DstReg
+ : MRI.createGenericVirtualRegister(WideTy);
auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
@@ -8265,7 +8268,7 @@ LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
if (DstTy.isPointer()) {
if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
- DstTy.getAddressSpace())) {
+ DstTy.getAddressSpace())) {
LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
return UnableToLegalize;
}
@@ -8907,8 +8910,7 @@ LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerShlSat(MachineInstr &MI) {
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerShlSat(MachineInstr &MI) {
assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
"Expected shlsat opcode!");
@@ -9048,7 +9050,7 @@ LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
Register ValReg = MI.getOperand(ValRegIndex).getReg();
const LLT Ty = MRI.getType(ValReg);
const MDString *RegStr = cast<MDString>(
- cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
+ cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
if (!PhysReg.isValid())
@@ -9102,7 +9104,40 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
}
// TODO: Try inverting the test with getInvertedFPClassTest like the DAG
- // version
+ auto GetInvertedFPClassTest =
+ [](const llvm::FPClassTest Test) -> llvm::FPClassTest {
+ llvm::FPClassTest InvertedTest = ~Test & fcAllFlags;
+ switch (InvertedTest) {
+ default:
+ break;
+ case fcNan:
+ case fcSNan:
+ case fcQNan:
+ case fcInf:
+ case fcPosInf:
+ case fcNegInf:
+ case fcNormal:
+ case fcPosNormal:
+ case fcNegNormal:
+ case fcSubnormal:
+ case fcPosSubnormal:
+ case fcNegSubnormal:
+ case fcZero:
+ case fcPosZero:
+ case fcNegZero:
+ case fcFinite:
+ case fcPosFinite:
+ case fcNegFinite:
+ return InvertedTest;
+ }
+ return fcNone;
+ };
+ bool isInverted = false;
+ FPClassTest InvertedMask = GetInvertedFPClassTest(Mask);
+ if (InvertedMask != fcNone) {
+ isInverted = true;
+ Mask = InvertedMask;
+ }
unsigned BitSize = SrcTy.getScalarSizeInBits();
const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
@@ -9253,7 +9288,9 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
}
appendToRes(NormalRes);
}
-
+ if(isInverted){
+ Res = MIRBuilder.buildNot(DstTy, Res);
+ }
MIRBuilder.buildCopy(DstReg, Res);
MI.eraseFromParent();
return Legalized;
More information about the llvm-commits
mailing list