[llvm] d49efdc - Revert "[AMDGPU] Add a new Clamp Pattern to the GlobalISel Path."
Sebastian Neubauer via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 3 02:07:50 PST 2021
Author: Sebastian Neubauer
Date: 2021-02-03T11:03:34+01:00
New Revision: d49efdc9696afee4b972c54bc3678b28c5700047
URL: https://github.com/llvm/llvm-project/commit/d49efdc9696afee4b972c54bc3678b28c5700047
DIFF: https://github.com/llvm/llvm-project/commit/d49efdc9696afee4b972c54bc3678b28c5700047.diff
LOG: Revert "[AMDGPU] Add a new Clamp Pattern to the GlobalISel Path."
This reverts commits 62af0305b7cc..677a3529d3e6 from D93708.
They cause failures in the sanitizer builds because of uninitialized
values.
A fix is in D95878, but it might take some time until this is pushed,
so reverting the changes for now.
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
llvm/lib/Target/AMDGPU/AMDGPUCombine.td
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
Removed:
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index e1f273ff71db..55d6d365fbb4 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -306,18 +306,6 @@ m_GAShr(const LHS &L, const RHS &R) {
return BinaryOp_match<LHS, RHS, TargetOpcode::G_ASHR, false>(L, R);
}
-template <typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, TargetOpcode::G_SMAX, false>
-m_GSMax(const LHS &L, const RHS &R) {
- return BinaryOp_match<LHS, RHS, TargetOpcode::G_SMAX, false>(L, R);
-}
-
-template <typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, TargetOpcode::G_SMIN, false>
-m_GSMin(const LHS &L, const RHS &R) {
- return BinaryOp_match<LHS, RHS, TargetOpcode::G_SMIN, false>(L, R);
-}
-
// Helper for unary instructions (G_[ZSA]EXT/G_TRUNC) etc
template <typename SrcTy, unsigned Opcode> struct UnaryOp_match {
SrcTy L;
@@ -480,13 +468,6 @@ m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2) {
TargetOpcode::G_INSERT_VECTOR_ELT>(Src0, Src1, Src2);
}
-template <typename Src0Ty, typename Src1Ty, typename Src2Ty>
-inline TernaryOp_match<Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_SELECT>
-m_GISelect(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2) {
- return TernaryOp_match<Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_SELECT>(
- Src0, Src1, Src2);
-}
-
/// Matches a register negated by a G_SUB.
/// G_SUB 0, %negated_reg
template <typename SrcTy>
@@ -503,7 +484,7 @@ m_Not(const SrcTy &&Src) {
return m_GXor(Src, m_AllOnesInt());
}
-} // namespace MIPatternMatch
+} // namespace GMIPatternMatch
} // namespace llvm
#endif
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index b6a6fb3e77db..a8399176bb4a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -37,21 +37,13 @@ def cvt_f32_ubyteN : GICombineRule<
[{ return PostLegalizerHelper.matchCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }]),
(apply [{ PostLegalizerHelper.applyCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }])>;
-def clamp_i64_to_i16_matchdata : GIDefMatchData<"AMDGPUPreLegalizerCombinerHelper::ClampI64ToI16MatchInfo">;
-
-def clamp_i64_to_i16 : GICombineRule<
- (defs root:$clamp_i64_to_i16, clamp_i64_to_i16_matchdata:$matchinfo),
- (match (wip_match_opcode G_TRUNC):$clamp_i64_to_i16,
- [{ return PreLegalizerHelper.matchClampI64ToI16(*${clamp_i64_to_i16}, MRI, *MF, ${matchinfo}); }]),
- (apply [{ PreLegalizerHelper.applyClampI64ToI16(*${clamp_i64_to_i16}, ${matchinfo}); }])>;
-
// Combines which should only apply on SI/VI
def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
+
def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
- "AMDGPUGenPreLegalizerCombinerHelper", [all_combines, clamp_i64_to_i16]> {
+ "AMDGPUGenPreLegalizerCombinerHelper", [all_combines]> {
let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule";
- let StateClass = "AMDGPUPreLegalizerCombinerHelperState";
}
def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 76406f318490..bba03736d01a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -174,9 +174,6 @@ def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE1, AMDGPUcvt_f32_ubyte1>;
def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE2, AMDGPUcvt_f32_ubyte2>;
def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE3, AMDGPUcvt_f32_ubyte3>;
-def : GINodeEquiv<G_AMDGPU_CVT_PK_I16_I32, AMDGPUpk_i16_i32_impl>;
-def : GINodeEquiv<G_AMDGPU_MED3, AMDGPUsmed3>;
-
def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>;
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_USHORT, SIbuffer_load_ushort>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index c0cb1781abe3..894677ec68b6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -213,8 +213,6 @@ def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2",
def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3",
SDTIntToFPOp, []>;
-def AMDGPUcvt_pk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32",
- AMDGPUIntPackOp, []>;
// urecip - This operation is a helper for integer division, it returns the
// result of 1 / a as a fractional unsigned integer.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index e018628ae8cc..e4b628bf6b23 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -12,9 +12,6 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPULegalizerInfo.h"
-#include "GCNSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
@@ -29,134 +26,6 @@
using namespace llvm;
using namespace MIPatternMatch;
-class AMDGPUPreLegalizerCombinerHelper {
-protected:
- MachineIRBuilder &B;
- MachineFunction &MF;
- MachineRegisterInfo &MRI;
- CombinerHelper &Helper;
-
-public:
- AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
- : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
-
- struct ClampI64ToI16MatchInfo {
- int64_t Cmp1;
- int64_t Cmp2;
- Register Origin;
- };
-
- bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineFunction &MF,
- ClampI64ToI16MatchInfo &MatchInfo);
-
- void applyClampI64ToI16(MachineInstr &MI,
- const ClampI64ToI16MatchInfo &MatchInfo);
-};
-
-bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16(
- MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF,
- ClampI64ToI16MatchInfo &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
-
- // Try to find a pattern where an i64 value should get clamped to short.
- const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
- if (SrcType != LLT::scalar(64))
- return false;
-
- const LLT DstType = MRI.getType(MI.getOperand(0).getReg());
- if (DstType != LLT::scalar(16))
- return false;
-
- Register Base;
-
- // Try to match a combination of min / max MIR opcodes.
- if (mi_match(MI.getOperand(1).getReg(), MRI, m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
- if (!mi_match(Base, MRI, m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
- return false;
- }
- }
-
- if (mi_match(MI.getOperand(1).getReg(), MRI, m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
- if (!mi_match(Base, MRI, m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
- return false;
- }
- }
-
- const auto Cmp1 = MatchInfo.Cmp1;
- const auto Cmp2 = MatchInfo.Cmp2;
- const auto Diff = std::abs(Cmp2 - Cmp1);
-
- // If the
diff erence between both comparison values is 0 or 1, there is no
- // need to clamp.
- if (Diff == 0 || Diff == 1)
- return false;
-
- const int64_t Min = std::numeric_limits<int16_t>::min();
- const int64_t Max = std::numeric_limits<int16_t>::max();
-
- // Check if the comparison values are between SHORT_MIN and SHORT_MAX.
- return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
- (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
-}
-
-// We want to find a combination of instructions that
-// gets generated when an i64 gets clamped to i16.
-// The corresponding pattern is:
-// G_MAX / G_MAX for i16 <= G_TRUNC i64.
-// This can be efficiently written as following:
-// v_cvt_pk_i16_i32 v0, v0, v1
-// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
-void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
- MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) {
-
- Register Src = MatchInfo.Origin;
- assert(MI.getParent()->getParent()->getRegInfo().getType(Src) ==
- LLT::scalar(64));
- const LLT S32 = LLT::scalar(32);
-
- B.setMBB(*MI.getParent());
- B.setInstrAndDebugLoc(MI);
-
- auto Unmerge = B.buildUnmerge(S32, Src);
-
- assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
-
- const LLT V2S16 = LLT::vector(2, 16);
- auto CvtPk = B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32,
- {V2S16},
- {Unmerge.getReg(0), Unmerge.getReg(1)},
- MI.getFlags());
-
- auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
- auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
- auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
- auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
-
- auto Bitcast = B.buildBitcast({S32}, CvtPk);
-
- auto Med3 = B.buildInstr(AMDGPU::G_AMDGPU_MED3,
- {S32},
- {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
- MI.getFlags());
-
- B.buildTrunc(MI.getOperand(0).getReg(), Med3);
-
- MI.eraseFromParent();
-}
-
-class AMDGPUPreLegalizerCombinerHelperState {
-protected:
- CombinerHelper &Helper;
- AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper;
-
-public:
- AMDGPUPreLegalizerCombinerHelperState(
- CombinerHelper &Helper,
- AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper)
- : Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {}
-};
-
#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AMDGPUGenPreLegalizeGICombiner.inc"
#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
@@ -190,9 +59,7 @@ bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
CombinerHelper Helper(Observer, B, KB, MDT);
- AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper);
- AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
- PreLegalizerHelper);
+ AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg);
if (Generated.tryCombineAll(Observer, MI, B, Helper))
return true;
@@ -258,7 +125,6 @@ bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
const Function &F = MF.getFunction();
bool EnableOpt =
MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
-
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
MachineDominatorTree *MDT =
IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index c9cca1e1beb8..502356d4f9a4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3621,8 +3621,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
- case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
- case AMDGPU::G_AMDGPU_MED3:
return getDefaultMappingVOP(MI);
case AMDGPU::G_UMULH:
case AMDGPU::G_SMULH: {
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 92c0d196de22..ecb875debefd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2575,18 +2575,6 @@ def G_AMDGPU_CVT_F32_UBYTE#N : AMDGPUGenericInstruction {
}
}
-def G_AMDGPU_CVT_PK_I16_I32 : AMDGPUGenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src0, type0:$src1);
- let hasSideEffects = 0;
-}
-
-def G_AMDGPU_MED3 : AMDGPUGenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
- let hasSideEffects = 0;
-}
-
// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
// operand Expects a MachineMemOperand in addition to explicit
// operands.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll
deleted file mode 100644
index 7d74c60a9e49..000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll
+++ /dev/null
@@ -1,112 +0,0 @@
-; RUN: llc -global-isel -mcpu=tahiti -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX678,GFX6789 %s
-; RUN: llc -global-isel -mcpu=gfx900 -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9,GFX6789 %s
-; RUN: llc -global-isel -mcpu=gfx1010 -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
-
-declare i64 @llvm.smax.i64(i64, i64)
-declare i64 @llvm.smin.i64(i64, i64)
-
-; GFX10-LABEL: {{^}}v_clamp_i64_i16
-; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
-; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
-; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000
-; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff
-; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
-; GFX10: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
-; GFX10: v_mov_b32_e32 [[B]], 0x7fff
-; GFX10: v_med3_i32 [[A]], 0xffff8000, [[A]], [[B]]
-define i16 @v_clamp_i64_i16(i64 %in) #0 {
-entry:
- %max = call i64 @llvm.smax.i64(i64 %in, i64 -32768)
- %min = call i64 @llvm.smin.i64(i64 %max, i64 32767)
- %result = trunc i64 %min to i16
- ret i16 %result
-}
-
-; GFX10-LABEL: {{^}}v_clamp_i64_i16_reverse
-; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
-; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
-; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000
-; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff
-; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
-; GFX10: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
-; GFX10: v_mov_b32_e32 [[B]], 0x7fff
-; GFX10: v_med3_i32 [[A]], 0xffff8000, [[A]], [[B]]
-define i16 @v_clamp_i64_i16_reverse(i64 %in) #0 {
-entry:
- %min = call i64 @llvm.smin.i64(i64 %in, i64 32767)
- %max = call i64 @llvm.smax.i64(i64 %min, i64 -32768)
- %result = trunc i64 %max to i16
- ret i16 %result
-}
-
-; GFX10-LABEL: {{^}}v_clamp_i64_i16_invalid_lower
-; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8001
-; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc
-; GFX6789: v_cndmask_b32_e32 [[C:v[0-9]+]], 0, [[C]], vcc
-
-; GFX10: v_cndmask_b32_e32 [[A:v[0-9]+]], 0x8001, [[A]], vcc_lo
-; GFX10: v_cndmask_b32_e32 [[B:v[0-9]+]], 0, [[B]], vcc_lo
-define i16 @v_clamp_i64_i16_invalid_lower(i64 %in) #0 {
-entry:
- %min = call i64 @llvm.smin.i64(i64 %in, i64 32769)
- %max = call i64 @llvm.smax.i64(i64 %min, i64 -32768)
- %result = trunc i64 %max to i16
- ret i16 %result
-}
-
-; GFX10-LABEL: {{^}}v_clamp_i64_i16_invalid_lower_and_higher
-; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8000
-; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc
-; GFX10: v_cndmask_b32_e32 [[A:v[0-9]+]], 0x8000, [[A]], vcc_lo
-define i16 @v_clamp_i64_i16_invalid_lower_and_higher(i64 %in) #0 {
-entry:
- %max = call i64 @llvm.smax.i64(i64 %in, i64 -32769)
- %min = call i64 @llvm.smin.i64(i64 %max, i64 32768)
- %result = trunc i64 %min to i16
- ret i16 %result
-}
-
-; GFX10-LABEL: {{^}}v_clamp_i64_i16_lower_than_short
-; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
-; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
-; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01
-; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100
-; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
-; GFX10: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
-; GFX10: v_mov_b32_e32 [[B]], 0x100
-; GFX10: v_med3_i32 [[A]], 0xffffff01, [[A]], [[B]]
-define i16 @v_clamp_i64_i16_lower_than_short(i64 %in) #0 {
-entry:
- %min = call i64 @llvm.smin.i64(i64 %in, i64 256)
- %max = call i64 @llvm.smax.i64(i64 %min, i64 -255)
- %result = trunc i64 %max to i16
- ret i16 %result
-}
-
-; GFX10-LABEL: {{^}}v_clamp_i64_i16_lower_than_short_reverse
-; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
-; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
-; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01
-; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100
-; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
-; GFX10: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
-; GFX10: v_mov_b32_e32 [[B]], 0x100
-; GFX10: v_med3_i32 [[A]], 0xffffff01, [[A]], [[B]]
-define i16 @v_clamp_i64_i16_lower_than_short_reverse(i64 %in) #0 {
-entry:
- %max = call i64 @llvm.smax.i64(i64 %in, i64 -255)
- %min = call i64 @llvm.smin.i64(i64 %max, i64 256)
- %result = trunc i64 %min to i16
- ret i16 %result
-}
-
-; GFX10-LABEL: {{^}}v_clamp_i64_i16_zero
-; GFX6789: v_mov_b32_e32 v0, 0
-; GFX10: v_mov_b32_e32 v0, 0
-define i16 @v_clamp_i64_i16_zero(i64 %in) #0 {
-entry:
- %max = call i64 @llvm.smax.i64(i64 %in, i64 0)
- %min = call i64 @llvm.smin.i64(i64 %max, i64 0)
- %result = trunc i64 %min to i16
- ret i16 %result
-}
More information about the llvm-commits
mailing list