[llvm] 6527b2a - [AMDGPU][NFC] Fix typos
Sebastian Neubauer via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 18 06:05:44 PST 2022
Author: Sebastian Neubauer
Date: 2022-02-18T15:05:21+01:00
New Revision: 6527b2a4d5fadd0743e13797968c49ec30112dfe
URL: https://github.com/llvm/llvm-project/commit/6527b2a4d5fadd0743e13797968c49ec30112dfe
DIFF: https://github.com/llvm/llvm-project/commit/6527b2a4d5fadd0743e13797968c49ec30112dfe.diff
LOG: [AMDGPU][NFC] Fix typos
Fix some typos in the amdgpu backend.
Differential Revision: https://reviews.llvm.org/D119235
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
llvm/lib/Target/AMDGPU/AMDKernelCodeT.h
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/BUFInstructions.td
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
llvm/lib/Target/AMDGPU/GCNRegPressure.h
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
llvm/lib/Target/AMDGPU/R600Packetizer.cpp
llvm/lib/Target/AMDGPU/R600TargetMachine.h
llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 1920684d8f1fd..94d7844e8a328 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -877,7 +877,7 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) {
return getMul64(Builder, LHS, RHS).second;
}
-/// Figure out how many bits are really needed for this ddivision. \p AtLeast is
+/// Figure out how many bits are really needed for this division. \p AtLeast is
/// an optimization hint to bypass the second ComputeNumSignBits call if we the
/// first one is insufficient. Returns -1 on failure.
int AMDGPUCodeGenPrepare::getDivNumBits(BinaryOperator &I,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 786fc54c466cb..f2b39d68b8572 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -123,7 +123,7 @@ def gi_smrd_buffer_imm32 :
// Separate load nodes are defined to glue m0 initialization in
// SelectionDAG. The GISel selector can just insert m0 initialization
-// directly before before selecting a glue-less load, so hide this
+// directly before selecting a glue-less load, so hide this
// distinction.
def : GINodeEquiv<G_LOAD, AMDGPUld_glue> {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 8236e6672247b..dc105dad27ce8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2840,7 +2840,7 @@ bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
}
}
}
- // If "AllUsesAcceptSReg == false" so far we haven't suceeded
+ // If "AllUsesAcceptSReg == false" so far we haven't succeeded
// commuting current user. This means have at least one use
// that strictly require VGPR. Thus, we will not attempt to commute
// other user instructions.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b7d0f0580cda0..533b32e94dcf8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1627,7 +1627,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
}
// The legalizer preprocessed the intrinsic arguments. If we aren't using
- // NSA, these should have beeen packed into a single value in the first
+ // NSA, these should have been packed into a single value in the first
// address register
const bool UseNSA = NumVAddrRegs != 1 && NumVAddrDwords == NumVAddrRegs;
if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index a1a69030df8d4..0404193d3ae6e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1510,7 +1510,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampMaxNumElements(1, S16, 2) // TODO: Make 4?
.clampMaxNumElements(0, S16, 64);
- // TODO: Don't fully scalarize v2s16 pieces? Or combine out thosse
+ // TODO: Don't fully scalarize v2s16 pieces? Or combine out those
// pre-legalize.
if (ST.hasVOP3PInsts()) {
getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
@@ -4377,7 +4377,7 @@ static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI,
///
/// We don't want to directly select image instructions just yet, but also want
/// to exposes all register repacking to the legalizer/combiners. We also don't
-/// want a selected instrution entering RegBankSelect. In order to avoid
+/// want a selected instruction entering RegBankSelect. In order to avoid
/// defining a multitude of intermediate image instructions, directly hack on
/// the intrinsic's arguments. In cases like a16 addresses, this requires
/// padding now unnecessary arguments with $noreg.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index c34c12ab9fecb..4519d2a1c7bdb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -73,7 +73,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F);
Align MaxAlign;
- // FIXME: Alignment is broken broken with explicit arg offset.;
+ // FIXME: Alignment is broken with explicit arg offset.;
const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign);
if (TotalKernArgSize == 0)
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index d8133ca052bf0..75cfd124cd070 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -14,7 +14,7 @@
// known address. AMDGPUMachineFunction allocates the LDS global.
//
// Local variables with constant annotation or non-undef initializer are passed
-// through unchanged for simplication or error diagnostics in later passes.
+// through unchanged for simplification or error diagnostics in later passes.
//
// To reduce the memory overhead variables that are only used by kernels are
// excluded from this transform. The analysis to determine whether a variable
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index 4e2f98d2a5dbc..d837f8cb2f60d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -1295,7 +1295,7 @@ static void fixRegionTerminator(RegionMRT *Region) {
}
}
-// If a region region is just a sequence of regions (and the exit
+// If a region is just a sequence of regions (and the exit
// block in the case of the top level region), we can simply skip
// linearizing it, because it is already linear
bool regionIsSequence(RegionMRT *Region) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
index 5a5a5d213a1a2..3ddfab1b670ec 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -71,7 +71,7 @@ ModulePass* llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass() {
return new AMDGPUOpenCLEnqueuedBlockLowering();
}
-/// Collect direct or indrect callers of \p F and save them
+/// Collect direct or indirect callers of \p F and save them
/// to \p Callers.
static void collectCallers(Function *F, DenseSet<Function *> &Callers) {
for (auto U : F->users()) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 99b7ffb338845..85bcb3c7d0982 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -917,7 +917,7 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
// usage order.
//
// FIXME: It is also possible that if we're allowed to use all of the memory
- // could could end up using more than the maximum due to alignment padding.
+ // could end up using more than the maximum due to alignment padding.
uint32_t NewSize = alignTo(CurrentLocalMemUsage, Alignment);
uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index f2b5beaa40790..8f14d0a451dfd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1789,7 +1789,7 @@ bool AMDGPURegisterBankInfo::buildVCopy(MachineIRBuilder &B, Register DstReg,
}
/// Utility function for pushing dynamic vector indexes with a constant offset
-/// into waterwall loops.
+/// into waterfall loops.
static void reinsertVectorIndexAdd(MachineIRBuilder &B,
MachineInstr &IdxUseInstr,
unsigned OpIdx,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index dd3676f3b707a..5d80ec6eb5673 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// The AMDGPU TargetMachine interface definition for hw codgen targets.
+/// The AMDGPU TargetMachine interface definition for hw codegen targets.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 1736c078eb83b..0e3f734fa1e7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -1487,8 +1487,7 @@ int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
MachineBasicBlock *
AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
MachineBasicBlock *PredMBB) {
- assert(PredMBB->isSuccessor(MBB) &&
- "succBlk is not a prececessor of curBlk");
+ assert(PredMBB->isSuccessor(MBB) && "succBlk is not a predecessor of curBlk");
MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions
replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
diff --git a/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h b/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h
index 654153ea51513..8e5f966b7c6c0 100644
--- a/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h
+++ b/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h
@@ -142,7 +142,7 @@ enum amd_code_property_mask_t {
/// is provided to the finalizer when it is invoked and is recorded
/// here. The hardware will interleave the memory requests of each
/// lane of a wavefront by this element size to ensure each
- /// work-item gets a distinct memory memory location. Therefore, the
+ /// work-item gets a distinct memory location. Therefore, the
/// finalizer ensures that all load and store operations done to
/// private memory do not exceed this size. For example, if the
/// element size is 4 (32-bits or dword) and a 64-bit value must be
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index d348a4c7e9091..ff99d1f57b919 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1930,7 +1930,7 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
// We allow fp literals with f16x2 operands assuming that the specified
// literal goes into the lower half and the upper half is zero. We also
- // require that the literal may be losslesly converted to f16.
+ // require that the literal may be losslessly converted to f16.
MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
(type == MVT::v2i16)? MVT::i16 :
(type == MVT::v2f32)? MVT::f32 : type;
@@ -2960,7 +2960,7 @@ AMDGPUAsmParser::isModifier() {
// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
// Negative fp literals with preceding "-" are
-// handled likewise for unifomtity
+// handled likewise for uniformity
//
bool
AMDGPUAsmParser::parseSP3NegModifier() {
@@ -6342,7 +6342,7 @@ AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
using namespace llvm::AMDGPU::SendMsg;
// Validation strictness depends on whether message is specified
- // in a symbolc or in a numeric form. In the latter case
+ // in a symbolic or in a numeric form. In the latter case
// only encoding possibility is checked.
bool Strict = Msg.IsSymbolic;
@@ -8384,7 +8384,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
#define GET_MNEMONIC_CHECKER
#include "AMDGPUGenAsmMatcher.inc"
-// This fuction should be defined after auto-generated include so that we have
+// This function should be defined after auto-generated include so that we have
// MatchClassKind enum defined
unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) {
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index a535c8cc09184..ccbafd02739c7 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -136,7 +136,7 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
bits<3> nfmt = format{6-4};
// GFX90A+ only: instruction uses AccVGPR for data
- // Bit superceedes tfe.
+ // Bit supersedes tfe.
bits<1> acc = !if(ps.has_vdata, vdata{9}, 0);
}
@@ -370,7 +370,7 @@ class MUBUF_Real <MUBUF_Pseudo ps> :
bits<8> soffset;
// GFX90A+ only: instruction uses AccVGPR for data
- // Bit superceedes tfe.
+ // Bit supersedes tfe.
bits<1> acc = !if(ps.has_vdata, vdata{9}, 0);
}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index c0592f6f3c7af..a1eb80b1b762b 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -20,7 +20,7 @@
using namespace llvm;
//===----------------------------------------------------------------------===//
-// Hazard Recoginizer Implementation
+// Hazard Recognizer Implementation
//===----------------------------------------------------------------------===//
static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF,
@@ -534,7 +534,7 @@ int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
// In order to handle these situations correctly we need to make sure that
// when a clause has more than one instruction, no instruction in the clause
// writes to a register that is read by another instruction in the clause
- // (including itself). If we encounter this situaion, we need to break the
+ // (including itself). If we encounter this situation, we need to break the
// clause by inserting a non SMEM instruction.
for (MachineInstr *MI : EmittedInstrs) {
diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
index 9f98f9ada802e..c39e47363d76f 100644
--- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -1,4 +1,4 @@
-//===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===//
+//===-- GCNNSAReassign.cpp - Reassign registers in NSA instructions -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -8,7 +8,7 @@
//
/// \file
/// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
-/// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA
+/// in NSA image instructions. Later SIShrinkInstructions pass will replace NSA
/// with sequential versions where possible.
///
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 257561cb8430f..c41548d19c8e7 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -10,7 +10,7 @@
/// This file defines the GCNRegPressure class, which tracks registry pressure
/// by bookkeeping number of SGPR/VGPRs used, weights for large SGPR/VGPRs. It
/// also implements a compare function, which compares
diff erent register
-/// pressures, and declares one with max occupance as winner.
+/// pressures, and declares one with max occupancy as winner.
///
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index e2d9e03260923..ef73ae237d615 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -695,7 +695,7 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major,
OS.emitBytes(VendorName);
OS.emitInt8(0); // NULL terminate VendorName
OS.emitBytes(ArchName);
- OS.emitInt8(0); // NULL terminte ArchName
+ OS.emitInt8(0); // NULL terminate ArchName
});
}
diff --git a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
index 715fd69fc7ae5..54ef6993cef9b 100644
--- a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
+/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative manner.
/// This pass is merging consecutive CFAlus where applicable.
/// It needs to be called after IfCvt for best results.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index b9ca7f928d563..699df681f1fd5 100644
--- a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -327,9 +327,9 @@ char R600EmitClauseMarkers::ID = 0;
} // end anonymous namespace
INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
- "R600 Emit Clause Markters", false, false)
+ "R600 Emit Clause Markers", false, false)
INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
- "R600 Emit Clause Markters", false, false)
+ "R600 Emit Clause Markers", false, false)
FunctionPass *llvm::createR600EmitClauseMarkers() {
return new R600EmitClauseMarkers();
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index bd757e9e3d704..06b8ec2dceb43 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -995,7 +995,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
/// convert these pointers to a register index. Each register holds
/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
-/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
+/// \p StackWidth, which tells us how many of the 4 sub-registers will be used
/// for indirect addressing.
SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
unsigned StackWidth,
@@ -1100,7 +1100,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
DAG.getConstant(3, DL, MVT::i32));
- // TODO: Contrary to the name of the functiom,
+ // TODO: Contrary to the name of the function,
// it also handles sub i32 non-truncating stores (like i1)
SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
Store->getValue());
@@ -1610,7 +1610,7 @@ static SDValue CompactSwizzlableVector(
if (NewBldVec[i].isUndef())
// We mask write here to teach later passes that the ith element of this
// vector is undef. Thus we can use it to reduce 128 bits reg usage,
- // break false dependencies and additionnaly make assembly easier to read.
+ // break false dependencies and additionally make assembly easier to read.
RemapSwizzle[i] = 7; // SEL_MASK_WRITE
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
if (C->isZero()) {
diff --git a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
index fbe2a1cd9fbac..59e2747875909 100644
--- a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -207,7 +207,7 @@ class R600PacketizerList : public VLIWPacketizerList {
return !ARDef || !ARUse;
}
- // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+ // isLegalToPruneDependencies - Is it legal to prune dependency between SUI
// and SUJ.
bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
return false;
diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.h b/llvm/lib/Target/AMDGPU/R600TargetMachine.h
index 0ccbca3c68b14..d9f7bc118f90d 100644
--- a/llvm/lib/Target/AMDGPU/R600TargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.h
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// The AMDGPU TargetMachine interface definition for hw codgen targets.
+/// The AMDGPU TargetMachine interface definition for hw codegen targets.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index 80ee7a00252a1..d7ca7f36284bf 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -241,7 +241,7 @@ void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI,
}
// Check register def/use conflicts, occupancy limits and collect def/use maps.
-// Return true if instruction can be bundled with previous. It it cannot
+// Return true if instruction can be bundled with previous. If it cannot
// def/use maps are not updated.
bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI,
RegUse &Defs, RegUse &Uses,
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 0169b752e9983..d12ff4a142128 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1200,7 +1200,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
}
}
- // Stack slot coloring may assign
diff erent objets to the same stack slot.
+ // Stack slot coloring may assign
diff erent objects to the same stack slot.
// If not, then the VGPR to AGPR spill slot is dead.
for (unsigned FI : SpillFIs.set_bits())
if (!NonVGPRSpillFIs.test(FI))
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0a02f64d2b034..cc4fc4800f0dc 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1581,11 +1581,11 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
if (Subtarget->hasUnalignedBufferAccessEnabled() &&
!(AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
AddrSpace == AMDGPUAS::REGION_ADDRESS)) {
- // If we have an uniform constant load, it still requires using a slow
+ // If we have a uniform constant load, it still requires using a slow
// buffer instruction if unaligned.
if (IsFast) {
// Accesses can really be issued as 1-byte aligned or 4-byte aligned, so
- // 2-byte alignment is worse than 1 unless doing a 2-byte accesss.
+ // 2-byte alignment is worse than 1 unless doing a 2-byte access.
*IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ?
Alignment >= Align(4) : Alignment != Align(2);
@@ -4565,7 +4565,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
// Otherwise f32 mad is always full rate and returns the same result as
// the separate operations so should be preferred over fma.
- // However does not support denomals.
+ // However does not support denormals.
if (hasFP32Denormals(MF))
return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();
@@ -8425,7 +8425,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- // If there is a possibilty that flat instruction access scratch memory
+ // If there is a possibility that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
if (AS == AMDGPUAS::FLAT_ADDRESS &&
!Subtarget->hasMultiDwordFlatScratchAddressing())
@@ -8513,7 +8513,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (NumElements > 2)
return SplitVectorLoad(Op, DAG);
- // SI has a hardware bug in the LDS / GDS boounds checking: if the base
+ // SI has a hardware bug in the LDS / GDS bounds checking: if the base
// address is negative, then the instruction is incorrectly treated as
// out-of-bounds even if base + offsets is in bounds. Split vectorized
// loads here to avoid emitting ds_read2_b32. We may re-combine the
@@ -8975,7 +8975,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- // If there is a possibilty that flat instruction access scratch memory
+ // If there is a possibility that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
if (AS == AMDGPUAS::FLAT_ADDRESS &&
!Subtarget->hasMultiDwordFlatScratchAddressing())
@@ -9024,7 +9024,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if (NumElements > 2)
return SplitVectorStore(Op, DAG);
- // SI has a hardware bug in the LDS / GDS boounds checking: if the base
+ // SI has a hardware bug in the LDS / GDS bounds checking: if the base
// address is negative, then the instruction is incorrectly treated as
// out-of-bounds even if base + offsets is in bounds. Split vectorized
// stores here to avoid emitting ds_write2_b32. We may re-combine the
@@ -10064,7 +10064,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
}
}
- // If one half is undef, and one is constant, perfer a splat vector rather
+ // If one half is undef, and one is constant, prefer a splat vector rather
// than the normal qNaN. If it's a register, prefer 0.0 since that's
// cheaper to use and may be free with a packed operation.
if (NewElts[0].isUndef()) {
@@ -10786,7 +10786,7 @@ SDValue SITargetLowering::performFAddCombine(SDNode *N,
SDValue RHS = N->getOperand(1);
// These should really be instruction patterns, but writing patterns with
- // source modiifiers is a pain.
+ // source modifiers is a pain.
// fadd (fadd (a, a), b) -> mad 2.0, a, b
if (LHS.getOpcode() == ISD::FADD) {
@@ -10883,8 +10883,8 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
return SDValue();
// fdot2_f32_f16 always flushes fp32 denormal operand and output to zero,
- // regardless of the denorm mode setting. Therefore, unsafe-fp-math/fp-contract
- // is sufficient to allow generaing fdot2.
+ // regardless of the denorm mode setting. Therefore,
+ // unsafe-fp-math/fp-contract is sufficient to allow generating fdot2.
const TargetOptions &Options = DAG.getTarget().Options;
if (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
(N->getFlags().hasAllowContract() &&
@@ -11585,7 +11585,7 @@ void SITargetLowering::AddIMGInit(MachineInstr &MI) const {
if (DstSize < InitIdx)
return;
- // Create a register for the intialization value.
+ // Create a register for the initialization value.
Register PrevDst = MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
unsigned NewDst = 0; // Final initialized value will be in here
@@ -11631,7 +11631,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
TII->legalizeOperandsVOP3(MRI, MI);
// Prefer VGPRs over AGPRs in mAI instructions where possible.
- // This saves a chain-copy of registers and better ballance register
+ // This saves a chain-copy of registers and better balance register
// use between vgpr and agpr as agpr tuples tend to be big.
if (MI.getDesc().OpInfo) {
unsigned Opc = MI.getOpcode();
@@ -12476,8 +12476,8 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
: AtomicExpansionKind::CmpXChg;
}
- // DS FP atomics do repect the denormal mode, but the rounding mode is fixed
- // to round-to-nearest-even.
+ // DS FP atomics do respect the denormal mode, but the rounding mode is
+ // fixed to round-to-nearest-even.
// The only exception is DS_ADD_F64 which never flushes regardless of mode.
if (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomicAdd()) {
if (!Ty->isDoubleTy())
@@ -12523,7 +12523,7 @@ SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
// always uniform.
static bool hasCFUser(const Value *V, SmallPtrSet<const Value *, 16> &Visited,
unsigned WaveSize) {
- // FIXME: We asssume we never cast the mask results of a control flow
+ // FIXME: We assume we never cast the mask results of a control flow
// intrinsic.
// Early exit if the type won't be consistent as a compile time hack.
IntegerType *IT = dyn_cast<IntegerType>(V->getType());
@@ -12627,7 +12627,7 @@ bool SITargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
SDValue N1) const {
if (!N0.hasOneUse())
return false;
- // Take care of the oportunity to keep N0 uniform
+ // Take care of the opportunity to keep N0 uniform
if (N0->isDivergent() || !N1->isDivergent())
return true;
// Check if we have a good chance to form the memory access pattern with the
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 8508a3bfc5c20..d6ea4c0c06b88 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1040,7 +1040,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
if (MI.isCall() && callWaitsOnFunctionEntry(MI)) {
// The function is going to insert a wait on everything in its prolog.
// This still needs to be careful if the call target is a load (e.g. a GOT
- // load). We also need to check WAW depenancy with saved PC.
+ // load). We also need to check WAW dependency with saved PC.
Wait = AMDGPU::Waitcnt();
int CallAddrOpIdx =
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 7e5c9e990d4be..8dea17bcde1f8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -203,7 +203,7 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
if (Offset0Idx == -1 || Offset1Idx == -1)
return false;
- // XXX - be careful of datalesss loads
+ // XXX - be careful of dataless loads
// getNamedOperandIdx returns the index for MachineInstrs. Since they
// include the output in the operand list, but SDNodes don't, we need to
// subtract the index by one.
@@ -486,7 +486,7 @@ bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
return false;
}
- // In order to avoid regester pressure, on an average, the number of DWORDS
+ // In order to avoid register pressure, on an average, the number of DWORDS
// loaded together by all clustered mem ops should not exceed 8. This is an
// empirical value based on certain observations and performance related
// experiments.
@@ -2875,7 +2875,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
default:
return false;
case AMDGPU::S_MOV_B64:
- // TODO: We could fold 64-bit immediates, but this get compilicated
+ // TODO: We could fold 64-bit immediates, but this get complicated
// when there are sub-registers.
return false;
@@ -2955,7 +2955,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
// Multiplied part is the constant: Use v_madmk_{f16, f32}.
- // We should only expect these to be on src0 due to canonicalizations.
+ // We should only expect these to be on src0 due to canonicalization.
if (Src0->isReg() && Src0->getReg() == Reg) {
if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
return false;
@@ -4065,9 +4065,9 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
- const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
+ const int OpIndices[] = {DstIdx, Src0Idx, Src1Idx, Src2Idx};
- for (int OpIdx: OpIndicies) {
+ for (int OpIdx : OpIndices) {
if (OpIdx == -1)
continue;
const MachineOperand &MO = MI.getOperand(OpIdx);
@@ -4230,7 +4230,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
SGPRUsed = findImplicitSGPRRead(MI);
if (SGPRUsed != AMDGPU::NoRegister) {
- // Implicit uses may safely overlap true overands
+ // Implicit uses may safely overlap true operands
if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) {
return !RI.regsOverlap(SGPRUsed, SGPR);
})) {
@@ -4707,7 +4707,7 @@ const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID,
bool IsAllocatable = false;
if (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::FLAT)) {
// vdst and vdata should be both VGPR or AGPR, same for the DS instructions
- // with two data operands. Request register class constainted to VGPR only
+ // with two data operands. Request register class constrained to VGPR only
// of both operands present as Machine Copy Propagation can not check this
// constraint and possibly other passes too.
//
@@ -5266,7 +5266,7 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
const MCInstrDesc &NewDesc = get(NewOpc);
Inst.setDesc(NewDesc);
- // Callers expect interator to be valid after this call, so modify the
+ // Callers expect iterator to be valid after this call, so modify the
// instruction in place.
if (OldVAddrIdx == NewVAddrIdx) {
MachineOperand &NewVAddr = Inst.getOperand(NewVAddrIdx);
@@ -5275,7 +5275,7 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
MRI.moveOperands(&NewVAddr, &SAddr, 1);
Inst.RemoveOperand(OldSAddrIdx);
// Update the use list with the pointer we have just moved from vaddr to
- // saddr poisition. Otherwise new vaddr will be missing from the use list.
+ // saddr position. Otherwise new vaddr will be missing from the use list.
MRI.removeRegOperandFromUseList(&NewVAddr);
MRI.addRegOperandToUseList(&NewVAddr);
} else {
@@ -5432,7 +5432,7 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
else
Cmp.addReg(VRsrc, VRsrcUndef, TRI->getSubRegFromChannel(Idx, 2));
- // Combine the comparision results with AND.
+ // Combine the comparison results with AND.
if (CondReg == AMDGPU::NoRegister) // First.
CondReg = NewCondReg;
else { // If not the first, we create an AND.
@@ -5796,7 +5796,7 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
if (RI.getCommonSubClass(MRI.getRegClass(Rsrc->getReg()),
RI.getRegClass(RsrcRC))) {
// The operands are legal.
- // FIXME: We may need to legalize operands besided srsrc.
+ // FIXME: We may need to legalize operands besides srsrc.
return CreatedBB;
}
@@ -5870,7 +5870,7 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode());
- // Atomics rith return have have an additional tied operand and are
+ // Atomics with return have an additional tied operand and are
// missing some of the special bits.
MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
MachineInstr *Addr64;
@@ -6501,7 +6501,7 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
// Using the identity !(x ^ y) == (!x ^ y) == (x ^ !y), we can
// invert either source and then perform the XOR. If either source is a
// scalar register, then we can leave the inversion on the scalar unit to
- // acheive a better distrubution of scalar and vector instructions.
+ // achieve a better distribution of scalar and vector instructions.
bool Src0IsSGPR = Src0.isReg() &&
RI.isSGPRClass(MRI.getRegClass(Src0.getReg()));
bool Src1IsSGPR = Src1.isReg() &&
@@ -6723,7 +6723,7 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
legalizeOperands(*LoHalf, MDT);
legalizeOperands(*HiHalf, MDT);
- // Move all users of this moved vlaue.
+ // Move all users of this moved value.
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
@@ -6787,7 +6787,7 @@ void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist,
Worklist.insert(&LoHalf);
Worklist.insert(&HiHalf);
- // Move all users of this moved vlaue.
+ // Move all users of this moved value.
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
@@ -6865,7 +6865,7 @@ void SIInstrInfo::splitScalar64BitBCNT(
MRI.replaceRegWith(Dest.getReg(), ResultReg);
- // We don't need to legalize operands here. src0 for etiher instruction can be
+ // We don't need to legalize operands here. src0 for either instruction can be
// an SGPR, and the second input is unused or determined here.
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
@@ -7079,7 +7079,7 @@ void SIInstrInfo::addSCCDefsToVALUWorklist(MachineOperand &Op,
assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isUse());
MachineInstr *SCCUseInst = Op.getParent();
- // Look for a preceeding instruction that either defines VCC or SCC. If VCC
+ // Look for a preceding instruction that either defines VCC or SCC. If VCC
// then there is nothing to do because the defining instruction has been
// converted to a VALU already. If SCC then that instruction needs to be
// converted to a VALU.
@@ -8194,7 +8194,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
this](int64_t ExpectedValue, unsigned SrcSize,
- bool IsReversable, bool IsSigned) -> bool {
+ bool IsReversible, bool IsSigned) -> bool {
// s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
// s_cmp_eq_i32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
// s_cmp_ge_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
@@ -8252,7 +8252,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
bool IsReversedCC = false;
if (CmpValue != ExpectedValue) {
- if (!IsReversable)
+ if (!IsReversible)
return false;
IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
if (!IsReversedCC)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 73544048e79cc..58606843ac9de 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1966,7 +1966,7 @@ class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT
class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
string dst = !if(HasDst,
!if(!eq(DstVT.Size, 1),
- " vcc", // use vcc token as dst for VOPC instructioins
+ " vcc", // use vcc token as dst for VOPC instructions
"$vdst"),
"");
string src0 = "$src0_modifiers";
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index c41087bbb9da8..fdc8f30c01b07 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1822,7 +1822,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
// from which (&a + 4096) has 13 bit distance. Both &a + 6144 and &a + 8192
// has 13bit distance from &a + 4096. The heuristic considers &a + 8192
// as the new-base(anchor) because of the maximum distance which can
- // accomodate more intermediate bases presumeably.
+ // accommodate more intermediate bases presumably.
//
// Step3: move (&a + 8192) above load1. Compute and promote offsets from
// (&a + 8192) for load1, load2, load4.
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 4bb05d9069780..66518fbbe4fbf 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -540,7 +540,7 @@ void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
return;
// Make sure we do not modify exec between def and use.
- // A copy with implcitly defined exec inserted earlier is an exclusion, it
+ // A copy with implicitly defined exec inserted earlier is an exclusion, it
// does not really modify exec.
for (auto I = Def->getIterator(); I != MI.getIterator(); ++I)
if (I->modifiesRegister(AMDGPU::EXEC, TRI) &&
@@ -580,7 +580,7 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) {
}
void SILowerControlFlow::optimizeEndCf() {
- // If the only instruction immediately following this END_CF is an another
+ // If the only instruction immediately following this END_CF is another
// END_CF in the only successor we can avoid emitting exec mask restore here.
if (!EnableOptimizeEndCf)
return;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 1bb17a549cbf4..5d24d66c24caf 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -331,7 +331,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
- // Add this register as live-in to all blocks to avoid machine verifer
+ // Add this register as live-in to all blocks to avoid machine verifier
// complaining about use of an undefined physical register.
for (MachineBasicBlock &BB : MF)
BB.addLiveIn(LaneVGPR);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 81db66a98ddf8..5fc12faa62454 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -64,7 +64,7 @@ using namespace llvm;
// First the instructions are put into blocks.
// We want the blocks help control register usage and hide high latencies
// later. To help control register usage, we typically want all local
-// computations, when for example you create a result that can be comsummed
+// computations, when for example you create a result that can be consumed
// right away, to be contained in a block. Block inputs and outputs would
// typically be important results that are needed in several locations of
// the shader. Since we do want blocks to help hide high latencies, we want
@@ -90,8 +90,8 @@ using namespace llvm;
// Increasing the number of active wavefronts helps hide the former, but it
// doesn't solve the latter, thus why even if wavefront count is high, we have
// to try have as many instructions hiding high latencies as possible.
-// The OpenCL doc says for example latency of 400 cycles for a global mem access,
-// which is hidden by 10 instructions if the wavefront count is 10.
+// The OpenCL doc says for example latency of 400 cycles for a global mem
+// access, which is hidden by 10 instructions if the wavefront count is 10.
// Some figures taken from AMD docs:
// Both texture and constant L1 caches are 4-way associative with 64 bytes
@@ -353,7 +353,7 @@ void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
// able to correctly handle 5 vs 6, 2 vs 3.
// (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
// The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
- // Comparing to LiveInRegs is not sufficient to
diff erenciate 4 vs 5, 7
+ // Comparing to LiveInRegs is not sufficient to
diff erentiate 4 vs 5, 7
// The use of findDefBetween removes the case 4.
for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
Register Reg = RegMaskPair.RegUnit;
@@ -402,7 +402,7 @@ void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
nodeScheduled(SU);
}
- // TODO: compute InternalAdditionnalPressure.
+ // TODO: compute InternalAdditionalPressure.
InternalAdditionalPressure.resize(TopPressure.MaxSetPressure.size());
// Check everything is right.
@@ -696,7 +696,7 @@ void SIScheduleBlockCreator::colorHighLatenciesGroups() {
bool HasSubGraph;
std::vector<int> SubGraph;
// By construction (topological order), if SU and
- // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
+ // DAG->SUnits[j] are linked, DAG->SUnits[j] is necessary
// in the parent graph of SU.
#ifndef NDEBUG
SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
@@ -1131,7 +1131,7 @@ void SIScheduleBlockCreator::colorExports() {
bool HasSubGraph;
std::vector<int> SubGraph;
// By construction (topological order), if SU and
- // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
+ // DAG->SUnits[j] are linked, DAG->SUnits[j] is necessary
// in the parent graph of SU.
#ifndef NDEBUG
SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
@@ -1148,7 +1148,7 @@ void SIScheduleBlockCreator::colorExports() {
for (unsigned k : SubGraph) {
if (!SIInstrInfo::isEXP(*DAG->SUnits[k].getInstr()))
// Other instructions than EXP would be required in the group.
- // Abort the groupping.
+ // Abort the grouping.
return;
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index fff4f6729c99a..d1ce9680c0328 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -63,7 +63,7 @@ enum class SIAtomicScope {
};
/// The distinct address spaces supported by the AMDGPU target for
-/// atomic memory operation. Can be ORed toether.
+/// atomic memory operation. Can be ORed together.
enum class SIAtomicAddrSpace {
NONE = 0u,
GLOBAL = 1u << 0,
@@ -943,7 +943,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
// The LDS keeps all memory operations in order for
- // the same wavesfront.
+ // the same wavefront.
break;
default:
llvm_unreachable("Unsupported synchronization scope");
@@ -1547,7 +1547,7 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
// The LDS keeps all memory operations in order for
- // the same wavesfront.
+ // the same wavefront.
break;
default:
llvm_unreachable("Unsupported synchronization scope");
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 5f89f38266833..12e6969be34b6 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -402,7 +402,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
}
// If the only user of a logical operation is move to exec, fold it now
- // to prevent forming of saveexec. I.e:
+ // to prevent forming of saveexec. I.e.:
//
// %0:sreg_64 = COPY $exec
// %1:sreg_64 = S_AND_B64 %0:sreg_64, %2:sreg_64
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index da41a5e2478a0..e768a2f3e1a5d 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -316,7 +316,7 @@ uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
}
if (Abs || Neg) {
assert(!Sext &&
- "Float and integer src modifiers can't be set simulteniously");
+ "Float and integer src modifiers can't be set simultaneously");
Mods |= Abs ? SISrcMods::ABS : 0u;
Mods ^= Neg ? SISrcMods::NEG : 0u;
} else if (Sext) {
@@ -1131,16 +1131,16 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
bool Converted = false;
for (auto &Operand : SDWAOperands) {
LLVM_DEBUG(dbgs() << *SDWAInst << "\nOperand: " << *Operand);
- // There should be no intesection between SDWA operands and potential MIs
+ // There should be no intersection between SDWA operands and potential MIs
// e.g.:
// v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0
// v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0
// v_add_u32 v3, v4, v2
//
- // In that example it is possible that we would fold 2nd instruction into 3rd
- // (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that was
- // already destroyed). So if SDWAOperand is also a potential MI then do not
- // apply it.
+ // In that example it is possible that we would fold 2nd instruction into
+ // 3rd (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that
+ // was already destroyed). So if SDWAOperand is also a potential MI then do
+ // not apply it.
if (PotentialMatches.count(Operand->getParentInst()) == 0)
Converted |= Operand->convertToSDWA(*SDWAInst, TII);
}
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index b0e45dd3e3e3a..41b9b0a939e7d 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -133,7 +133,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
return Changed;
MaskValue = M->getOperand(1).getImm();
// First if sreg is only used in the AND instruction fold the immediate
- // into into the AND.
+ // into the AND.
if (!ReadsSreg && Op2.isKill()) {
A->getOperand(2).ChangeToImmediate(MaskValue);
M->eraseFromParent();
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 71b8b779ba76d..4d6557c23b720 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -97,7 +97,7 @@ class RegSeqNames<int last_reg, int stride, int size, string prefix,
[]);
}
-// Generates list of dags for register tupless.
+// Generates list of dags for register tuples.
class RegSeqDags<RegisterClass RC, int last_reg, int stride, int size,
int start = 0> {
dag trunc_rc = (trunc RC,
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index a15686c637cd8..07b1c42a9b2fc 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -458,11 +458,11 @@ static void dropInstructionKeepingImpDefs(MachineInstr &MI,
// Returns next valid instruction pointer if was able to create v_swap_b32.
//
// This shall not be done too early not to prevent possible folding which may
-// remove matched moves, and this should prefereably be done before RA to
+// remove matched moves, and this should preferably be done before RA to
// release saved registers and also possibly after RA which can insert copies
// too.
//
-// This is really just a generic peephole that is not a canocical shrinking,
+// This is really just a generic peephole that is not a canonical shrinking,
// although requirements match the pass placement and it reduces code size too.
static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
const SIInstrInfo *TII) {
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 46efb3c605c69..720fc213f77cd 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -969,7 +969,7 @@ MachineInstr *SIWholeQuadMode::lowerKillI1(MachineBasicBlock &MBB,
MachineInstr *WQMMaskMI = nullptr;
Register LiveMaskWQM;
if (IsDemote) {
- // Demotes deactive quads with only helper lanes
+ // Demote - deactivate quads with only helper lanes
LiveMaskWQM = MRI->createVirtualRegister(TRI->getBoolRC());
WQMMaskMI =
BuildMI(MBB, MI, DL, TII->get(WQMOpc), LiveMaskWQM).addReg(LiveMaskReg);
@@ -977,7 +977,7 @@ MachineInstr *SIWholeQuadMode::lowerKillI1(MachineBasicBlock &MBB,
.addReg(Exec)
.addReg(LiveMaskWQM);
} else {
- // Kills deactivate lanes
+ // Kill - deactivate lanes no longer in live mask
if (Op.isImm()) {
unsigned MovOpc = ST->isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
NewTerm = BuildMI(MBB, &MI, DL, TII->get(MovOpc), Exec).addImm(0);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 7df0eab964e62..05cd3deac4acd 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -935,7 +935,7 @@ inline bool isLegal64BitDPPControl(unsigned DC) {
/// \returns true if the intrinsic is divergent
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
-// Track defaults for fields in the MODE registser.
+// Track defaults for fields in the MODE register.
struct SIModeRegisterDefaults {
/// Floating point opcodes that support exception flag gathering quiet and
/// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
index 292500a8b77e8..65ed02ca62de8 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
@@ -35,8 +35,8 @@ std::vector<GlobalVariable *> findVariablesToLower(Module &M,
/// Replace all uses of constant \p C with instructions in \p F.
void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F);
-/// Given a \p Def clobbering a load from \p Ptr accroding to the MSSA check
-/// if this is actually a memory update or an artifical clobber to facilitate
+/// Given a \p Def clobbering a load from \p Ptr according to the MSSA check
+/// if this is actually a memory update or an artificial clobber to facilitate
/// ordering constraints.
bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA);
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 9b998404faa9e..8a3548cd89f21 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -390,7 +390,7 @@ class VOPProfileMAI<VOPProfile P, RegisterOperand _SrcRC, RegisterOperand _DstRC
let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, Src2RC64:$src2, cbsz:$cbsz, abid:$abid, blgp:$blgp);
// Dst and SrcC cannot partially overlap if SrcC/Dst is bigger than 4 VGPRs.
// We then create two versions of the instruction: with tied dst and src2
- // and with the eralyclobber flag on the dst. This is strciter than the
+ // and with the earlyclobber flag on the dst. This is stricter than the
// actual HW restriction. In particular earlyclobber also affects src0 and
// src1 allocation which is not required.
bit NoDstOverlap = !gt(DstVT.Size, 128);
More information about the llvm-commits
mailing list