[llvm] b27d255 - AMDGPU/GlobalISel: Form CVT_F32_UBYTE0
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 30 14:52:19 PDT 2020
Author: Matt Arsenault
Date: 2020-03-30T17:45:55-04:00
New Revision: b27d255e1e40bc065e68e39c6e1eaf3a5a16f005
URL: https://github.com/llvm/llvm-project/commit/b27d255e1e40bc065e68e39c6e1eaf3a5a16f005
DIFF: https://github.com/llvm/llvm-project/commit/b27d255e1e40bc065e68e39c6e1eaf3a5a16f005.diff
LOG: AMDGPU/GlobalISel: Form CVT_F32_UBYTE0
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-itofp.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCombine.td
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 4063bb1fca2c..ff8a31dee337 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -20,6 +20,13 @@ def fcmp_select_to_fmin_fmax_legacy : GICombineRule<
(apply [{ applySelectFCmpToFMinToFMaxLegacy(*${select}, ${matchinfo}); }])>;
+def uchar_to_float : GICombineRule<
+ (defs root:$itofp),
+ (match (wip_match_opcode G_UITOFP, G_SITOFP):$itofp,
+ [{ return matchUCharToFloat(*${itofp}, MRI, *MF, Helper); }]),
+ (apply [{ applyUCharToFloat(*${itofp}); }])>;
+
+
// Combines which should only apply on SI/VI
def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
@@ -32,6 +39,6 @@ def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
"AMDGPUGenPostLegalizerCombinerHelper", [all_combines,
- gfx6gfx7_combines]> {
+ gfx6gfx7_combines, uchar_to_float]> {
let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule";
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index faf007f7c639..3d6a2c707765 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -153,6 +153,11 @@ def : GINodeEquiv<G_AMDGPU_FMIN_LEGACY, AMDGPUfmin_legacy>;
def : GINodeEquiv<G_AMDGPU_FMAX_LEGACY, AMDGPUfmax_legacy>;
def : GINodeEquiv<G_AMDGPU_RCP_IFLAG, AMDGPUrcp_iflag>;
+def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE0, AMDGPUcvt_f32_ubyte0>;
+def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE1, AMDGPUcvt_f32_ubyte1>;
+def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE2, AMDGPUcvt_f32_ubyte2>;
+def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE3, AMDGPUcvt_f32_ubyte3>;
+
def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>;
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_USHORT, SIbuffer_load_ushort>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 35299b6dec62..7fe5e484db0b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -127,6 +127,43 @@ static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
MI.eraseFromParent();
}
+static bool matchUCharToFloat(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineFunction &MF, CombinerHelper &Helper) {
+ Register DstReg = MI.getOperand(0).getReg();
+
+ // TODO: We could try to match extracting the higher bytes, which would be
+ // easier if i8 vectors weren't promoted to i32 vectors, particularly after
+ // types are legalized. v4i8 -> v4f32 is probably the only case to worry
+ // about in practice.
+ LLT Ty = MRI.getType(DstReg);
+ if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
+ const APInt Mask = APInt::getHighBitsSet(32, 24);
+ return Helper.getKnownBits()->maskedValueIsZero(MI.getOperand(1).getReg(),
+ Mask);
+ }
+
+ return false;
+}
+
+static void applyUCharToFloat(MachineInstr &MI) {
+ MachineIRBuilder B(MI);
+
+ const LLT S32 = LLT::scalar(32);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = B.getMRI()->getType(DstReg);
+
+ if (Ty == S32) {
+ B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
+ {MI.getOperand(1)}, MI.getFlags());
+ } else {
+ auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
+ {MI.getOperand(1)}, MI.getFlags());
+ B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
+ }
+
+ MI.eraseFromParent();
+}
#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AMDGPUGenPostLegalizeGICombiner.inc"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 7a49f76f7056..8070bcb2905b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3293,6 +3293,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
case AMDGPU::G_AMDGPU_FMAX_LEGACY:
case AMDGPU::G_AMDGPU_RCP_IFLAG:
+ case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
+ case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
+ case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
+ case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
return getDefaultMappingVOP(MI);
case AMDGPU::G_UMULH:
case AMDGPU::G_SMULH: {
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 030baa3263c1..6e6b2e77b0fe 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2297,6 +2297,14 @@ def G_AMDGPU_FMAX_LEGACY : AMDGPUGenericInstruction {
let hasSideEffects = 0;
}
+foreach N = 0-3 in {
+def G_AMDGPU_CVT_F32_UBYTE#N : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0);
+ let hasSideEffects = 0;
+}
+}
+
// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
// operand Expects a MachineMemOperand in addition to explicit
// operands.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-itofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-itofp.mir
new file mode 100644
index 000000000000..2021107b8448
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-itofp.mir
@@ -0,0 +1,175 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: uitofp_char_to_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: uitofp_char_to_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 255
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s32) = G_UITOFP %2
+ $vgpr0 = COPY %3
+...
+
+---
+name: uitofp_too_many_bits_to_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: uitofp_too_many_bits_to_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32)
+ ; CHECK: $vgpr0 = COPY [[UITOFP]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 256
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s32) = G_UITOFP %2
+ $vgpr0 = COPY %3
+...
+
+---
+name: sitofp_char_to_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: sitofp_char_to_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 255
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s32) = G_SITOFP %2
+ $vgpr0 = COPY %3
+...
+
+---
+name: sitofp_bits127_to_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: sitofp_bits127_to_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 127
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s32) = G_SITOFP %2
+ $vgpr0 = COPY %3
+...
+
+---
+name: sitofp_bits128_to_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: sitofp_bits128_to_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 128
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s32) = G_SITOFP %2
+ $vgpr0 = COPY %3
+...
+---
+name: sitofp_too_many_bits_to_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: sitofp_too_many_bits_to_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[AND]](s32)
+ ; CHECK: $vgpr0 = COPY [[SITOFP]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 256
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s32) = G_SITOFP %2
+ $vgpr0 = COPY %3
+...
+
+---
+name: uitofp_char_to_f16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: uitofp_char_to_f16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
+ ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[AMDGPU_CVT_F32_UBYTE0_]](s32)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
+ ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 255
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s16) = G_UITOFP %2
+ %4:_(s32) = G_ANYEXT %3
+ $vgpr0 = COPY %4
+...
+
+---
+name: sitofp_char_to_f16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: sitofp_char_to_f16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
+ ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[AMDGPU_CVT_F32_UBYTE0_]](s32)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
+ ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 255
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s16) = G_SITOFP %2
+ %4:_(s32) = G_ANYEXT %3
+ $vgpr0 = COPY %4
+...
More information about the llvm-commits
mailing list