[llvm] 08db696 - [AMDGPU][True16][MC] V_MED3_I/U16_fake16 CodeGen pattern (#120600)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 19 19:54:01 PST 2024
Author: Brox Chen
Date: 2024-12-20T10:53:58+07:00
New Revision: 08db696c87fb68cc896f742202665440cee53f8c
URL: https://github.com/llvm/llvm-project/commit/08db696c87fb68cc896f742202665440cee53f8c
DIFF: https://github.com/llvm/llvm-project/commit/08db696c87fb68cc896f742202665440cee53f8c.diff
LOG: [AMDGPU][True16][MC] V_MED3_I/U16_fake16 CodeGen pattern (#120600)
In this patch https://github.com/llvm/llvm-project/pull/113603 replace
`V_MED3_I/U16` to `V_MED3_I/U16_fake16` for Post-GFX11, but it miss to
update the CodeGen pattern. This patch update and corrert the CodeGen
pattern
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index b1f93a447a7b82..789ce8815cf801 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3645,13 +3645,15 @@ multiclass FPMed3Pat<ValueType vt,
multiclass Int16Med3Pat<Instruction med3Inst,
SDPatternOperator min,
- SDPatternOperator max> {
+ SDPatternOperator max,
+ RegisterOperand outputSrcType> {
// This matches 16 permutations of
// max(min(x, y), min(max(x, y), z))
def : GCNPat <
(max (min i16:$src0, i16:$src1),
(min (max i16:$src0, i16:$src1), i16:$src2)),
- (med3Inst SRCMODS.NONE, VSrc_b16:$src0, SRCMODS.NONE, VSrc_b16:$src1, SRCMODS.NONE, VSrc_b16:$src2, DSTCLAMP.NONE)
+ (med3Inst SRCMODS.NONE, outputSrcType:$src0, SRCMODS.NONE, outputSrcType:$src1,
+ SRCMODS.NONE, outputSrcType:$src2, DSTCLAMP.NONE)
>;
// This matches 16 permutations of
@@ -3716,10 +3718,16 @@ def : FPMinCanonMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>
def : FPMinCanonMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
}
-let OtherPredicates = [isGFX9Plus] in {
-defm : Int16Med3Pat<V_MED3_I16_e64, smin, smax>;
-defm : Int16Med3Pat<V_MED3_U16_e64, umin, umax>;
-} // End Predicates = [isGFX9Plus]
+let SubtargetPredicate = isGFX9Plus in {
+let True16Predicate = NotHasTrue16BitInsts in {
+ defm : Int16Med3Pat<V_MED3_I16_e64, smin, smax, VSrc_b16>;
+ defm : Int16Med3Pat<V_MED3_U16_e64, umin, umax, VSrc_b16>;
+}
+let True16Predicate = UseFakeTrue16Insts in {
+ defm : Int16Med3Pat<V_MED3_I16_fake16_e64, smin, smax, VSrc_b16>;
+ defm : Int16Med3Pat<V_MED3_U16_fake16_e64, umin, umax, VSrc_b16>;
+}
+} // End SubtargetPredicate = [isGFX9Plus]
let OtherPredicates = [isGFX12Plus] in {
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir
index 9dc53bd1dc0bdd..22dd12eac0923e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir
@@ -24,6 +24,7 @@ body: |
; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec
; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]]
+ ;
; GFX9-LABEL: name: smed3_s16_vvv
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-NEXT: {{ $}}
@@ -32,14 +33,15 @@ body: |
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]]
+ ;
; GFX11-LABEL: name: smed3_s16_vvv
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]]
+ ; GFX11-NEXT: [[V_MED3_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
@@ -75,6 +77,7 @@ body: |
; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec
; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_]]
+ ;
; GFX9-LABEL: name: smed3_s16_vvv_multiuse0
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-NEXT: {{ $}}
@@ -84,6 +87,7 @@ body: |
; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]]
+ ;
; GFX11-LABEL: name: smed3_s16_vvv_multiuse0
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX11-NEXT: {{ $}}
@@ -91,8 +95,8 @@ body: |
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX11-NEXT: [[V_MAX_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_fake16_e64_]]
+ ; GFX11-NEXT: [[V_MED3_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_fake16_e64_]], implicit [[V_MAX_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
@@ -128,6 +132,7 @@ body: |
; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec
; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MIN_I16_e64_]]
+ ;
; GFX9-LABEL: name: smed3_s16_vvv_multiuse1
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-NEXT: {{ $}}
@@ -137,6 +142,7 @@ body: |
; GFX9-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_e64_]]
+ ;
; GFX11-LABEL: name: smed3_s16_vvv_multiuse1
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX11-NEXT: {{ $}}
@@ -144,8 +150,8 @@ body: |
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX11-NEXT: [[V_MIN_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_fake16_e64_]]
+ ; GFX11-NEXT: [[V_MED3_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_fake16_e64_]], implicit [[V_MIN_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
@@ -181,6 +187,7 @@ body: |
; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec
; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_1]]
+ ;
; GFX9-LABEL: name: smed3_s16_vvv_multiuse2
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-NEXT: {{ $}}
@@ -191,6 +198,7 @@ body: |
; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]]
+ ;
; GFX11-LABEL: name: smed3_s16_vvv_multiuse2
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX11-NEXT: {{ $}}
@@ -199,8 +207,8 @@ body: |
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX11-NEXT: [[V_MIN_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX11-NEXT: [[V_MAX_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_fake16_e64 [[V_MIN_I16_fake16_e64_]], [[COPY2]], implicit $exec
- ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_fake16_e64_]]
+ ; GFX11-NEXT: [[V_MED3_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_fake16_e64_]], implicit [[V_MAX_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir
index 6928c963a5fcf9..6e1489e3227d96 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir
@@ -24,6 +24,7 @@ body: |
; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec
; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]]
+ ;
; GFX9-LABEL: name: umed3_s16_vvv
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-NEXT: {{ $}}
@@ -32,14 +33,15 @@ body: |
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]]
+ ;
; GFX11-LABEL: name: umed3_s16_vvv
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]]
+ ; GFX11-NEXT: [[V_MED3_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
@@ -75,6 +77,7 @@ body: |
; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec
; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_]]
+ ;
; GFX9-LABEL: name: umed3_s16_vvv_multiuse0
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-NEXT: {{ $}}
@@ -84,6 +87,7 @@ body: |
; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]]
+ ;
; GFX11-LABEL: name: umed3_s16_vvv_multiuse0
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX11-NEXT: {{ $}}
@@ -91,8 +95,8 @@ body: |
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX11-NEXT: [[V_MAX_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_fake16_e64_]]
+ ; GFX11-NEXT: [[V_MED3_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_fake16_e64_]], implicit [[V_MAX_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
@@ -128,6 +132,7 @@ body: |
; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec
; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MIN_U16_e64_]]
+ ;
; GFX9-LABEL: name: umed3_s16_vvv_multiuse1
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-NEXT: {{ $}}
@@ -137,6 +142,7 @@ body: |
; GFX9-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_e64_]]
+ ;
; GFX11-LABEL: name: umed3_s16_vvv_multiuse1
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX11-NEXT: {{ $}}
@@ -144,8 +150,8 @@ body: |
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX11-NEXT: [[V_MIN_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_fake16_e64_]]
+ ; GFX11-NEXT: [[V_MED3_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_fake16_e64_]], implicit [[V_MIN_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
@@ -181,6 +187,7 @@ body: |
; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec
; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_1]]
+ ;
; GFX9-LABEL: name: umed3_s16_vvv_multiuse2
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-NEXT: {{ $}}
@@ -191,6 +198,7 @@ body: |
; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]]
+ ;
; GFX11-LABEL: name: umed3_s16_vvv_multiuse2
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX11-NEXT: {{ $}}
@@ -199,8 +207,8 @@ body: |
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX11-NEXT: [[V_MIN_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX11-NEXT: [[V_MAX_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_fake16_e64 [[V_MIN_U16_fake16_e64_]], [[COPY2]], implicit $exec
- ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_fake16_e64_]]
+ ; GFX11-NEXT: [[V_MED3_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_fake16_e64_]], implicit [[V_MAX_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
More information about the llvm-commits
mailing list