[llvm] c73df56 - AMDGPU/GlobalISel: Address some test fixmes that don't fail now
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 18 07:57:32 PDT 2020
Author: Matt Arsenault
Date: 2020-07-18T10:54:39-04:00
New Revision: c73df5696696327a15af2f05b30923cd66361ddc
URL: https://github.com/llvm/llvm-project/commit/c73df5696696327a15af2f05b30923cd66361ddc
DIFF: https://github.com/llvm/llvm-project/commit/c73df5696696327a15af2f05b30923cd66361ddc.diff
LOG: AMDGPU/GlobalISel: Address some test fixmes that don't fail now
Added:
Modified:
llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
index ff0de0d1f609..c815220ef97d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
@@ -1,294 +1,182 @@
-; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=regbankselect -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=regbankselect -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
; Make sure we don't violate the constant bus restriction
-; FIXME: Make this test isa output when div.fmas works.
-
define amdgpu_ps float @fmul_s_s(float inreg %src0, float inreg %src1) {
- ; GFX9-LABEL: name: fmul_s_s
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9: liveins: $sgpr2, $sgpr3
- ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX9: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY3]]
- ; GFX9: $vgpr0 = COPY [[FMUL]](s32)
- ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
- ; GFX10-LABEL: name: fmul_s_s
- ; GFX10: bb.1 (%ir-block.0):
- ; GFX10: liveins: $sgpr2, $sgpr3
- ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX10: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY3]]
- ; GFX10: $vgpr0 = COPY [[FMUL]](s32)
- ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fmul_s_s:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v0, s3
+; GFX9-NEXT: v_mul_f32_e32 v0, s2, v0
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: fmul_s_s:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mul_f32_e64 v0, s2, s3
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: ; return to shader part epilog
%result = fmul float %src0, %src1
ret float %result
}
define amdgpu_ps float @fmul_ss(float inreg %src) {
- ; GFX9-LABEL: name: fmul_ss
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9: liveins: $sgpr2
- ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY1]], [[COPY2]]
- ; GFX9: $vgpr0 = COPY [[FMUL]](s32)
- ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
- ; GFX10-LABEL: name: fmul_ss
- ; GFX10: bb.1 (%ir-block.0):
- ; GFX10: liveins: $sgpr2
- ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY1]], [[COPY2]]
- ; GFX10: $vgpr0 = COPY [[FMUL]](s32)
- ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fmul_ss:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mul_f32_e64 v0, s2, s2
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: fmul_ss:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mul_f32_e64 v0, s2, s2
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: ; return to shader part epilog
%result = fmul float %src, %src
ret float %result
}
; Ternary operation with 3
diff erent SGPRs
define amdgpu_ps float @fma_s_s_s(float inreg %src0, float inreg %src1, float inreg %src2) {
- ; GFX9-LABEL: name: fma_s_s_s
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4
- ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX9: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
- ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
- ; GFX9: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY5]]
- ; GFX9: $vgpr0 = COPY [[FMA]](s32)
- ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
- ; GFX10-LABEL: name: fma_s_s_s
- ; GFX10: bb.1 (%ir-block.0):
- ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4
- ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX10: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
- ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
- ; GFX10: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY5]]
- ; GFX10: $vgpr0 = COPY [[FMA]](s32)
- ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fma_s_s_s:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v0, s3
+; GFX9-NEXT: v_mov_b32_e32 v1, s4
+; GFX9-NEXT: v_fma_f32 v0, s2, v0, v1
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: fma_s_s_s:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_fma_f32 v0, s3, s2, v0
+; GFX10-NEXT: ; return to shader part epilog
%result = call float @llvm.fma.f32(float %src0, float %src1, float %src2)
ret float %result
}
; Ternary operation with 3 identical SGPRs
define amdgpu_ps float @fma_sss(float inreg %src) {
- ; GFX9-LABEL: name: fma_sss
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9: liveins: $sgpr2
- ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY1]], [[COPY2]], [[COPY3]]
- ; GFX9: $vgpr0 = COPY [[FMA]](s32)
- ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
- ; GFX10-LABEL: name: fma_sss
- ; GFX10: bb.1 (%ir-block.0):
- ; GFX10: liveins: $sgpr2
- ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY1]], [[COPY2]], [[COPY3]]
- ; GFX10: $vgpr0 = COPY [[FMA]](s32)
- ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fma_sss:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_fma_f32 v0, s2, s2, s2
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: fma_sss:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_fma_f32 v0, s2, s2, s2
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: ; return to shader part epilog
%result = call float @llvm.fma.f32(float %src, float %src, float %src)
ret float %result
}
; src0/1 are same SGPR
define amdgpu_ps float @fma_ss_s(float inreg %src01, float inreg %src2) {
- ; GFX9-LABEL: name: fma_ss_s
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9: liveins: $sgpr2, $sgpr3
- ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX9: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
- ; GFX9: $vgpr0 = COPY [[FMA]](s32)
- ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
- ; GFX10-LABEL: name: fma_ss_s
- ; GFX10: bb.1 (%ir-block.0):
- ; GFX10: liveins: $sgpr2, $sgpr3
- ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX10: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
- ; GFX10: $vgpr0 = COPY [[FMA]](s32)
- ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fma_ss_s:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v0, s3
+; GFX9-NEXT: v_fma_f32 v0, s2, s2, v0
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: fma_ss_s:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_fma_f32 v0, s2, s2, s3
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: ; return to shader part epilog
%result = call float @llvm.fma.f32(float %src01, float %src01, float %src2)
ret float %result
}
; src1/2 are same SGPR
define amdgpu_ps float @fma_s_ss(float inreg %src0, float inreg %src12) {
- ; GFX9-LABEL: name: fma_s_ss
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9: liveins: $sgpr2, $sgpr3
- ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX9: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
- ; GFX9: $vgpr0 = COPY [[FMA]](s32)
- ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
- ; GFX10-LABEL: name: fma_s_ss
- ; GFX10: bb.1 (%ir-block.0):
- ; GFX10: liveins: $sgpr2, $sgpr3
- ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX10: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
- ; GFX10: $vgpr0 = COPY [[FMA]](s32)
- ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fma_s_ss:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v0, s3
+; GFX9-NEXT: v_fma_f32 v0, s2, v0, v0
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: fma_s_ss:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_fma_f32 v0, s2, s3, s3
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: ; return to shader part epilog
%result = call float @llvm.fma.f32(float %src0, float %src12, float %src12)
ret float %result
}
; src0/2 are same SGPR
define amdgpu_ps float @fma_ss_s_same_outer(float inreg %src02, float inreg %src1) {
- ; GFX9-LABEL: name: fma_ss_s_same_outer
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9: liveins: $sgpr2, $sgpr3
- ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
- ; GFX9: $vgpr0 = COPY [[FMA]](s32)
- ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
- ; GFX10-LABEL: name: fma_ss_s_same_outer
- ; GFX10: bb.1 (%ir-block.0):
- ; GFX10: liveins: $sgpr2, $sgpr3
- ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
- ; GFX10: $vgpr0 = COPY [[FMA]](s32)
- ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fma_ss_s_same_outer:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v0, s3
+; GFX9-NEXT: v_fma_f32 v0, s2, v0, s2
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: fma_ss_s_same_outer:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_fma_f32 v0, s2, s3, s2
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: ; return to shader part epilog
%result = call float @llvm.fma.f32(float %src02, float %src1, float %src02)
ret float %result
}
define amdgpu_ps float @fcmp_s_s(float inreg %src0, float inreg %src1) {
- ; GFX9-LABEL: name: fcmp_s_s
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9: liveins: $sgpr2, $sgpr3
- ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX9: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY2]]
- ; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
- ; GFX9: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY3]], [[COPY4]]
- ; GFX9: $vgpr0 = COPY [[SELECT]](s32)
- ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
- ; GFX10-LABEL: name: fcmp_s_s
- ; GFX10: bb.1 (%ir-block.0):
- ; GFX10: liveins: $sgpr2, $sgpr3
- ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX10: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY2]]
- ; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
- ; GFX10: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY3]], [[COPY4]]
- ; GFX10: $vgpr0 = COPY [[SELECT]](s32)
- ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fcmp_s_s:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v0, s3
+; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, s2, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: fcmp_s_s:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cmp_eq_f32_e64 s0, s2, s3
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0
+; GFX10-NEXT: ; return to shader part epilog
%cmp = fcmp oeq float %src0, %src1
%result = select i1 %cmp, float 1.0, float 0.0
ret float %result
}
define amdgpu_ps float @select_vcc_s_s(float %cmp0, float %cmp1, float inreg %src0, float inreg %src1) {
- ; GFX9-LABEL: name: select_vcc_s_s
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
- ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX9: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX9: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX9: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
- ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
- ; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
- ; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
- ; GFX9: $vgpr0 = COPY [[SELECT]](s32)
- ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
- ; GFX10-LABEL: name: select_vcc_s_s
- ; GFX10: bb.1 (%ir-block.0):
- ; GFX10: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
- ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX10: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX10: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX10: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
- ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
- ; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
- ; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
- ; GFX10: $vgpr0 = COPY [[SELECT]](s32)
- ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: select_vcc_s_s:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v2, s2
+; GFX9-NEXT: v_mov_b32_e32 v3, s3
+; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: select_vcc_s_s:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_e32 v2, s3
+; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, s2, vcc_lo
+; GFX10-NEXT: ; return to shader part epilog
%cmp = fcmp oeq float %cmp0, %cmp1
%result = select i1 %cmp, float %src0, float %src1
ret float %result
}
define amdgpu_ps float @select_vcc_fneg_s_s(float %cmp0, float %cmp1, float inreg %src0, float inreg %src1) {
- ; GFX9-LABEL: name: select_vcc_fneg_s_s
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
- ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX9: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX9: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX9: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
- ; GFX9: [[FNEG:%[0-9]+]]:sgpr(s32) = G_FNEG [[COPY2]]
- ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[FNEG]](s32)
- ; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
- ; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
- ; GFX9: $vgpr0 = COPY [[SELECT]](s32)
- ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
- ; GFX10-LABEL: name: select_vcc_fneg_s_s
- ; GFX10: bb.1 (%ir-block.0):
- ; GFX10: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
- ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX10: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX10: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX10: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
- ; GFX10: [[FNEG:%[0-9]+]]:sgpr(s32) = G_FNEG [[COPY2]]
- ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[FNEG]](s32)
- ; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
- ; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
- ; GFX10: $vgpr0 = COPY [[SELECT]](s32)
- ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: select_vcc_fneg_s_s:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v2, s3
+; GFX9-NEXT: v_mov_b32_e32 v3, s2
+; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, -v3, vcc
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: select_vcc_fneg_s_s:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_e32 v2, s2
+; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_cndmask_b32_e64 v0, s3, -v2, vcc_lo
+; GFX10-NEXT: ; return to shader part epilog
%cmp = fcmp oeq float %cmp0, %cmp1
%neg.src0 = fneg float %src0
%result = select i1 %cmp, float %neg.src0, float %src1
@@ -297,122 +185,73 @@ define amdgpu_ps float @select_vcc_fneg_s_s(float %cmp0, float %cmp1, float inre
; Constant bus used by vcc
define amdgpu_ps float @amdgcn_div_fmas_sss(float inreg %src, float %cmp.src) {
- ; GFX9-LABEL: name: amdgcn_div_fmas_sss
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9: liveins: $sgpr2, $vgpr0
- ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
- ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX9: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY1]](s32), [[COPY2]]
- ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[FCMP]](s1)
- ; GFX9: $vgpr0 = COPY [[INT]](s32)
- ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
- ; GFX10-LABEL: name: amdgcn_div_fmas_sss
- ; GFX10: bb.1 (%ir-block.0):
- ; GFX10: liveins: $sgpr2, $vgpr0
- ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
- ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX10: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY1]](s32), [[COPY2]]
- ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[FCMP]](s1)
- ; GFX10: $vgpr0 = COPY [[INT]](s32)
- ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: amdgcn_div_fmas_sss:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-NEXT: s_nop 2
+; GFX9-NEXT: v_div_fmas_f32 v0, v0, v0, v0
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: amdgcn_div_fmas_sss:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
+; GFX10-NEXT: v_div_fmas_f32 v0, s2, s2, s2
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: ; return to shader part epilog
%vcc = fcmp oeq float %cmp.src, 0.0
%result = call float @llvm.amdgcn.div.fmas.f32(float %src, float %src, float %src, i1 %vcc)
ret float %result
}
define amdgpu_ps float @class_s_s(float inreg %src0, i32 inreg %src1) {
- ; GFX9-LABEL: name: class_s_s
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9: liveins: $sgpr2, $sgpr3
- ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX9: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
- ; GFX9: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[INT]](s1), [[COPY4]], [[COPY5]]
- ; GFX9: $vgpr0 = COPY [[SELECT]](s32)
- ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
- ; GFX10-LABEL: name: class_s_s
- ; GFX10: bb.1 (%ir-block.0):
- ; GFX10: liveins: $sgpr2, $sgpr3
- ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX10: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
- ; GFX10: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[INT]](s1), [[COPY4]], [[COPY5]]
- ; GFX10: $vgpr0 = COPY [[SELECT]](s32)
- ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: class_s_s:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v0, s3
+; GFX9-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: class_s_s:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cmp_class_f32_e64 s0, s2, s3
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0
+; GFX10-NEXT: ; return to shader part epilog
%class = call i1 @llvm.amdgcn.class.f32(float %src0, i32 %src1)
%result = select i1 %class, float 1.0, float 0.0
ret float %result
}
define amdgpu_ps float @div_scale_s_s_true(float inreg %src0, float inreg %src1) {
- ; GFX9-LABEL: name: div_scale_s_s_true
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9: liveins: $sgpr2, $sgpr3
- ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX9: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), -1
- ; GFX9: $vgpr0 = COPY [[INT]](s32)
- ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
- ; GFX10-LABEL: name: div_scale_s_s_true
- ; GFX10: bb.1 (%ir-block.0):
- ; GFX10: liveins: $sgpr2, $sgpr3
- ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX10: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), -1
- ; GFX10: $vgpr0 = COPY [[INT]](s32)
- ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: div_scale_s_s_true:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v0, s3
+; GFX9-NEXT: v_div_scale_f32 v0, s[0:1], s2, v0, s2
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: div_scale_s_s_true:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_div_scale_f32 v0, s0, s2, s3, s2
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: ; return to shader part epilog
%div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 true)
%result = extractvalue { float, i1 } %div.scale, 0
ret float %result
}
define amdgpu_ps float @div_scale_s_s_false(float inreg %src0, float inreg %src1) {
- ; GFX9-LABEL: name: div_scale_s_s_false
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9: liveins: $sgpr2, $sgpr3
- ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX9: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), 0
- ; GFX9: $vgpr0 = COPY [[INT]](s32)
- ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
- ; GFX10-LABEL: name: div_scale_s_s_false
- ; GFX10: bb.1 (%ir-block.0):
- ; GFX10: liveins: $sgpr2, $sgpr3
- ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX10: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), 0
- ; GFX10: $vgpr0 = COPY [[INT]](s32)
- ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: div_scale_s_s_false:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v0, s3
+; GFX9-NEXT: v_div_scale_f32 v0, s[0:1], v0, v0, s2
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: div_scale_s_s_false:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_div_scale_f32 v0, s0, s3, s3, s2
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: ; return to shader part epilog
%div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 false)
%result = extractvalue { float, i1 } %div.scale, 0
ret float %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll
index 8cba08f016da..4193d976afd6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll
@@ -1,5 +1,4 @@
-; FIXME: Broken SI run line
-; XUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll
+; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll
; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll
; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll
index 28c2c7a4e9bf..e2c3b625395a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll
@@ -1,5 +1,4 @@
-; FIXME: Broken SI run line
-; XUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll
+; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll
; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll
; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
index 5389adf5a526..7d116f8e8925 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
@@ -687,14 +687,48 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrs
ret void
}
-; FIXME
-; define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
-; %src = load i32, i32 addrspace(1)* %in, align 4
-; %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16)
-; %div = sdiv i32 %bfe, 2
-; store i32 %div, i32 addrspace(1)* %out, align 4
-; ret void
-; }
+define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+; GFX6-LABEL: simplify_demanded_bfe_sdiv:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 2
+; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GFX6-NEXT: s_mov_b32 s6, -1
+; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: s_load_dword s2, s[2:3], 0x0
+; GFX6-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-NEXT: v_mul_lo_u32 v1, -2, v0
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: s_bfe_i32 s2, s2, 0x100001
+; GFX6-NEXT: s_ashr_i32 s3, s2, 31
+; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
+; GFX6-NEXT: s_add_i32 s2, s2, s3
+; GFX6-NEXT: s_xor_b32 s2, s2, s3
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
+; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v0
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0
+; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s2, v1
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX6-NEXT: v_subrev_i32_e64 v2, s[0:1], 2, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX6-NEXT: v_xor_b32_e32 v0, s3, v0
+; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
+; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT: s_endpgm
+ %src = load i32, i32 addrspace(1)* %in, align 4
+ %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16)
+ %div = sdiv i32 %bfe, 2
+ store i32 %div, i32 addrspace(1)* %out, align 4
+ ret void
+}
define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
; GFX6-LABEL: bfe_0_width:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
index a8631a18de3c..2512aaaeb082 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
@@ -15,14 +15,58 @@ entry:
ret i32 %r0.val
}
-; FIXME:
-; define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> inreg %b) {
-; entry:
-; %xor = xor <2 x i16> %a, %b
-; %r0.val = xor <2 x i16> %xor, <i16 -1, i16 -1>
-; %cast = bitcast <2 x i16> %r0.val to i32
-; ret i32 %cast
-; }
+; FIXME: fails to match
+define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> inreg %b) {
+; GFX7-LABEL: scalar_xnor_v2i16_one_use:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_mov_b32 s4, 0xffff
+; GFX7-NEXT: s_lshl_b32 s1, s1, 16
+; GFX7-NEXT: s_and_b32 s0, s0, s4
+; GFX7-NEXT: s_or_b32 s0, s1, s0
+; GFX7-NEXT: s_lshl_b32 s1, s3, 16
+; GFX7-NEXT: s_and_b32 s2, s2, s4
+; GFX7-NEXT: s_or_b32 s1, s1, s2
+; GFX7-NEXT: s_xor_b32 s0, s0, s1
+; GFX7-NEXT: s_xor_b32 s0, s0, -1
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: scalar_xnor_v2i16_one_use:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_mov_b32 s2, 0xffff
+; GFX8-NEXT: s_lshr_b32 s5, s0, 16
+; GFX8-NEXT: s_lshr_b32 s6, s1, 16
+; GFX8-NEXT: s_and_b32 s4, s0, s2
+; GFX8-NEXT: s_and_b32 s0, s1, s2
+; GFX8-NEXT: s_and_b32 s5, s5, s2
+; GFX8-NEXT: s_and_b32 s1, s6, s2
+; GFX8-NEXT: s_mov_b32 s3, s2
+; GFX8-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1]
+; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
+; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX8-NEXT: s_lshl_b32 s1, s1, 16
+; GFX8-NEXT: s_and_b32 s0, s0, s2
+; GFX8-NEXT: s_or_b32 s0, s1, s0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX900-LABEL: scalar_xnor_v2i16_one_use:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_pack_ll_b32_b16 s2, -1, -1
+; GFX900-NEXT: s_xor_b32 s0, s0, s1
+; GFX900-NEXT: s_xor_b32 s0, s0, s2
+; GFX900-NEXT: ; return to shader part epilog
+;
+; GFX906-LABEL: scalar_xnor_v2i16_one_use:
+; GFX906: ; %bb.0: ; %entry
+; GFX906-NEXT: s_pack_ll_b32_b16 s2, -1, -1
+; GFX906-NEXT: s_xor_b32 s0, s0, s1
+; GFX906-NEXT: s_xor_b32 s0, s0, s2
+; GFX906-NEXT: ; return to shader part epilog
+entry:
+ %xor = xor <2 x i16> %a, %b
+ %r0.val = xor <2 x i16> %xor, <i16 -1, i16 -1>
+ %cast = bitcast <2 x i16> %r0.val to i32
+ ret i32 %cast
+}
define amdgpu_ps <2 x i32> @scalar_xnor_i32_mul_use(i32 inreg %a, i32 inreg %b) {
; GCN-LABEL: scalar_xnor_i32_mul_use:
@@ -51,13 +95,79 @@ define amdgpu_ps i64 @scalar_xnor_i64_one_use(i64 inreg %a, i64 inreg %b) {
ret i64 %r0.val
}
-; FIXME:
-; define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> inreg %b) {
-; %xor = xor <4 x i16> %a, %b
-; %ret = xor <4 x i16> %xor, <i16 -1, i16 -1, i16 -1, i16 -1>
-; %cast = bitcast <4 x i16> %ret to i64
-; ret i64 %cast
-; }
+; FIXME: fails to match
+define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> inreg %b) {
+; GFX7-LABEL: scalar_xnor_v4i16_one_use:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s8, 0xffff
+; GFX7-NEXT: s_lshl_b32 s1, s1, 16
+; GFX7-NEXT: s_and_b32 s0, s0, s8
+; GFX7-NEXT: s_or_b32 s0, s1, s0
+; GFX7-NEXT: s_lshl_b32 s1, s3, 16
+; GFX7-NEXT: s_and_b32 s2, s2, s8
+; GFX7-NEXT: s_or_b32 s1, s1, s2
+; GFX7-NEXT: s_and_b32 s3, s4, s8
+; GFX7-NEXT: s_lshl_b32 s2, s5, 16
+; GFX7-NEXT: s_or_b32 s2, s2, s3
+; GFX7-NEXT: s_lshl_b32 s3, s7, 16
+; GFX7-NEXT: s_and_b32 s4, s6, s8
+; GFX7-NEXT: s_or_b32 s3, s3, s4
+; GFX7-NEXT: s_mov_b32 s4, -1
+; GFX7-NEXT: s_mov_b32 s5, s4
+; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: scalar_xnor_v4i16_one_use:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_mov_b32 s4, 0xffff
+; GFX8-NEXT: s_lshr_b32 s5, s0, 16
+; GFX8-NEXT: s_and_b32 s7, s5, s4
+; GFX8-NEXT: s_lshr_b32 s5, s1, 16
+; GFX8-NEXT: s_and_b32 s6, s0, s4
+; GFX8-NEXT: s_and_b32 s0, s1, s4
+; GFX8-NEXT: s_and_b32 s1, s5, s4
+; GFX8-NEXT: s_lshr_b32 s5, s2, 16
+; GFX8-NEXT: s_and_b32 s8, s2, s4
+; GFX8-NEXT: s_and_b32 s9, s5, s4
+; GFX8-NEXT: s_lshr_b32 s5, s3, 16
+; GFX8-NEXT: s_and_b32 s2, s3, s4
+; GFX8-NEXT: s_and_b32 s3, s5, s4
+; GFX8-NEXT: s_xor_b64 s[6:7], s[6:7], s[8:9]
+; GFX8-NEXT: s_mov_b32 s5, s4
+; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX8-NEXT: s_and_b64 s[2:3], s[6:7], s[4:5]
+; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
+; GFX8-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX8-NEXT: s_xor_b64 s[6:7], s[0:1], s[4:5]
+; GFX8-NEXT: s_and_b32 s1, s2, s4
+; GFX8-NEXT: s_lshl_b32 s0, s3, 16
+; GFX8-NEXT: s_or_b32 s0, s0, s1
+; GFX8-NEXT: s_lshl_b32 s1, s7, 16
+; GFX8-NEXT: s_and_b32 s2, s6, s4
+; GFX8-NEXT: s_or_b32 s1, s1, s2
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX900-LABEL: scalar_xnor_v4i16_one_use:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_pack_ll_b32_b16 s4, -1, -1
+; GFX900-NEXT: s_mov_b32 s5, s4
+; GFX900-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX900-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
+; GFX900-NEXT: ; return to shader part epilog
+;
+; GFX906-LABEL: scalar_xnor_v4i16_one_use:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_pack_ll_b32_b16 s4, -1, -1
+; GFX906-NEXT: s_mov_b32 s5, s4
+; GFX906-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX906-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
+; GFX906-NEXT: ; return to shader part epilog
+ %xor = xor <4 x i16> %a, %b
+ %ret = xor <4 x i16> %xor, <i16 -1, i16 -1, i16 -1, i16 -1>
+ %cast = bitcast <4 x i16> %ret to i64
+ ret i64 %cast
+}
define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b) {
; GCN-LABEL: scalar_xnor_i64_mul_use:
More information about the llvm-commits
mailing list