[llvm] d722924 - Added comments.

Thomas Symalla via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 2 00:15:59 PST 2021


Author: Thomas Symalla
Date: 2021-02-02T09:14:52+01:00
New Revision: d722924f20918fd29d342cb2be50393d0fa9347f

URL: https://github.com/llvm/llvm-project/commit/d722924f20918fd29d342cb2be50393d0fa9347f
DIFF: https://github.com/llvm/llvm-project/commit/d722924f20918fd29d342cb2be50393d0fa9347f.diff

LOG: Added comments.

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
    llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index 12a1d3854e37..189272a3eb61 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -491,11 +491,6 @@ m_Not(const SrcTy &&Src) {
   return m_GXor(Src, m_AllOnesInt());
 }
 
-// class that allows to match one of the following patterns:
-//    select (pred, x, value1) -> cmp slt -> select (pred, origin, value2) ->
-//    cmp sgt OR select (pred, x, value1) -> cmp sgt -> select (pred, origin,
-//    value2) -> cmp slt
-// also binds the boundary values and the origin.
 template <typename Boundary1, typename Boundary2, typename Origin>
 struct MaxMin_match_helper {
   Boundary1 B1;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 03375a4c89bd..2c7ddd4d0033 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -273,6 +273,9 @@ bool AMDGPUPostLegalizerCombinerHelper::matchClampI64ToI16(
     ClampI64ToI16MatchInfo &MatchInfo) {
   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
   const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
+
+  // we want to check if a 64-bit number gets clamped to 16-bit boundaries (or
+  // below).
   if (SrcType != LLT::scalar(64))
     return false;
 
@@ -283,13 +286,11 @@ bool AMDGPUPostLegalizerCombinerHelper::matchClampI64ToI16(
   if (mi_match(MI.getOperand(1).getReg(), MRI,
                m_MaxMin(m_ICst(MatchInfo.Cmp1), m_ICst(MatchInfo.Cmp2),
                         m_Reg(MatchInfo.Origin)))) {
-    const auto Cmp1 = static_cast<int64_t>(MatchInfo.Cmp1);
-    const auto Cmp2 = static_cast<int64_t>(MatchInfo.Cmp2);
+    const auto Cmp1 = MatchInfo.Cmp1;
+    const auto Cmp2 = MatchInfo.Cmp2;
 
-    const int64_t Min =
-        static_cast<int64_t>(std::numeric_limits<int16_t>::min());
-    const int64_t Max =
-        static_cast<int64_t>(std::numeric_limits<int16_t>::max());
+    const int64_t Min = std::numeric_limits<int16_t>::min();
+    const int64_t Max = std::numeric_limits<int16_t>::max();
 
     // are we really trying to clamp against short boundaries?
     return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
@@ -299,6 +300,15 @@ bool AMDGPUPostLegalizerCombinerHelper::matchClampI64ToI16(
   return false;
 }
 
+/**
+ * We want to find a combination of instructions that
+ * gets generated when an i64 gets clamped to i16.
+ * The corresponding pattern is:
+ * G_SELECT MIN/MAX, G_ICMP, G_SELECT MIN/MAX, G_ICMP, G_TRUNC.
+ * This can be efficiently written as following:
+ * v_cvt_pk_i16_i32 v0, v0, v1
+ * v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
+ */
 void AMDGPUPostLegalizerCombinerHelper::applyClampI64ToI16(
     MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) {
   LLVM_DEBUG(dbgs() << "Combining MI");
@@ -319,8 +329,7 @@ void AMDGPUPostLegalizerCombinerHelper::applyClampI64ToI16(
   constexpr unsigned int CvtOpcode = AMDGPU::V_CVT_PK_I16_I32_e64;
   assert(MI.getOpcode() != CvtOpcode);
 
-  Register CvtDst = MRI.createGenericVirtualRegister(S32);
-  MRI.setRegClass(CvtDst, &AMDGPU::VGPR_32RegClass);
+  Register CvtDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
   auto CvtPk = B.buildInstr(CvtOpcode);
   CvtPk.addDef(CvtDst);
@@ -331,16 +340,13 @@ void AMDGPUPostLegalizerCombinerHelper::applyClampI64ToI16(
   auto min = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
   auto max = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
 
-  Register MinBoundaryDst = MRI.createGenericVirtualRegister(S32);
-  MRI.setRegClass(MinBoundaryDst, &AMDGPU::VGPR_32RegClass);
+  Register MinBoundaryDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   B.buildConstant(MinBoundaryDst, min);
 
-  Register MaxBoundaryDst = MRI.createGenericVirtualRegister(S32);
-  MRI.setRegClass(MaxBoundaryDst, &AMDGPU::VGPR_32RegClass);
+  Register MaxBoundaryDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   B.buildConstant(MaxBoundaryDst, max);
 
-  Register MedDst = MRI.createGenericVirtualRegister(S32);
-  MRI.setRegClass(MedDst, &AMDGPU::VGPR_32RegClass);
+  Register MedDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
   auto Med = B.buildInstr(AMDGPU::V_MED3_I32);
   Med.addDef(MedDst);


        


More information about the llvm-commits mailing list