[PATCH] R600/SI: Fix operand types for v_sub_f64 pseudo
Tom Stellard
tom at stellard.net
Tue Feb 17 17:38:57 PST 2015
On Tue, Feb 17, 2015 at 11:55:13PM +0000, Matt Arsenault wrote:
> Fully remove pseudo. Depends on fmad patches for some tests to not fail
>
LGTM.
>
> http://reviews.llvm.org/D7108
>
> Files:
> lib/Target/R600/AMDGPUISelLowering.cpp
> lib/Target/R600/SIISelLowering.cpp
> lib/Target/R600/SIInstructions.td
> test/CodeGen/R600/fneg-fabs.f64.ll
> test/CodeGen/R600/fneg.f64.ll
> test/CodeGen/R600/fsub64.ll
>
> EMAIL PREFERENCES
> http://reviews.llvm.org/settings/panel/emailpreferences/
> Index: lib/Target/R600/AMDGPUISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.cpp
> +++ lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -137,6 +137,9 @@
> if (!Subtarget->hasFP32Denormals())
> setOperationAction(ISD::FMAD, MVT::f32, Legal);
>
> + // Expand to fneg + fadd.
> + setOperationAction(ISD::FSUB, MVT::f64, Expand);
> +
> // Lower floating point store/load to integer store/load to reduce the number
> // of patterns in tablegen.
> setOperationAction(ISD::STORE, MVT::f32, Promote);
> Index: lib/Target/R600/SIISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/SIISelLowering.cpp
> +++ lib/Target/R600/SIISelLowering.cpp
> @@ -605,19 +605,8 @@
> switch (MI->getOpcode()) {
> default:
> return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
> - case AMDGPU::BRANCH: return BB;
> - case AMDGPU::V_SUB_F64: {
> - unsigned DestReg = MI->getOperand(0).getReg();
> - BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg)
> - .addImm(0) // SRC0 modifiers
> - .addReg(MI->getOperand(1).getReg())
> - .addImm(1) // SRC1 modifiers
> - .addReg(MI->getOperand(2).getReg())
> - .addImm(0) // CLAMP
> - .addImm(0); // OMOD
> - MI->eraseFromParent();
> - break;
> - }
> + case AMDGPU::BRANCH:
> + return BB;
> case AMDGPU::SI_RegisterStorePseudo: {
> MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
> unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
> Index: lib/Target/R600/SIInstructions.td
> ===================================================================
> --- lib/Target/R600/SIInstructions.td
> +++ lib/Target/R600/SIInstructions.td
> @@ -1947,17 +1947,6 @@
>
> } // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
>
> -let usesCustomInserter = 1 in {
> -
> -def V_SUB_F64 : InstSI <
> - (outs VReg_64:$dst),
> - (ins VReg_64:$src0, VReg_64:$src1),
> - "v_sub_f64 $dst, $src0, $src1",
> - [(set f64:$dst, (fsub f64:$src0, f64:$src1))]
> ->;
> -
> -} // end usesCustomInserter
> -
> multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
>
> let UseNamedOperandTable = 1 in {
> Index: test/CodeGen/R600/fneg-fabs.f64.ll
> ===================================================================
> --- test/CodeGen/R600/fneg-fabs.f64.ll
> +++ test/CodeGen/R600/fneg-fabs.f64.ll
> @@ -5,9 +5,7 @@
> ; into 2 modifiers, although theoretically that should work.
>
> ; FUNC-LABEL: {{^}}fneg_fabs_fadd_f64:
> -; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x7fffffff
> -; SI: v_and_b32_e32 v[[FABS:[0-9]+]], {{s[0-9]+}}, [[IMMREG]]
> -; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}}
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|
> define void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) {
> %fabs = call double @llvm.fabs.f64(double %x)
> %fsub = fsub double -0.000000e+00, %fabs
> Index: test/CodeGen/R600/fneg.f64.ll
> ===================================================================
> --- test/CodeGen/R600/fneg.f64.ll
> +++ test/CodeGen/R600/fneg.f64.ll
> @@ -39,8 +39,7 @@
> ; unless the target returns true for isNegFree()
>
> ; FUNC-LABEL: {{^}}fneg_free_f64:
> -; FIXME: Unnecessary copy to VGPRs
> -; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
> +; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, 0, -{{s\[[0-9]+:[0-9]+\]$}}
> define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
> %bc = bitcast i64 %in to double
> %fsub = fsub double 0.0, %bc
> Index: test/CodeGen/R600/fsub64.ll
> ===================================================================
> --- test/CodeGen/R600/fsub64.ll
> +++ test/CodeGen/R600/fsub64.ll
> @@ -1,13 +1,107 @@
> ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
> ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
>
> +declare double @llvm.fabs.f64(double) #0
> +
> ; SI-LABEL: {{^}}fsub_f64:
> ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
> define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
> double addrspace(1)* %in2) {
> - %r0 = load double addrspace(1)* %in1
> - %r1 = load double addrspace(1)* %in2
> - %r2 = fsub double %r0, %r1
> - store double %r2, double addrspace(1)* %out
> - ret void
> + %r0 = load double addrspace(1)* %in1
> + %r1 = load double addrspace(1)* %in2
> + %r2 = fsub double %r0, %r1
> + store double %r2, double addrspace(1)* %out
> + ret void
> +}
> +
> +; SI-LABEL: {{^}}fsub_fabs_f64:
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}}
> +define void @fsub_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
> + double addrspace(1)* %in2) {
> + %r0 = load double addrspace(1)* %in1
> + %r1 = load double addrspace(1)* %in2
> + %r1.fabs = call double @llvm.fabs.f64(double %r1) #0
> + %r2 = fsub double %r0, %r1.fabs
> + store double %r2, double addrspace(1)* %out
> + ret void
> +}
> +
> +; SI-LABEL: {{^}}fsub_fabs_inv_f64:
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, -v\[[0-9]+:[0-9]+\]}}
> +define void @fsub_fabs_inv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
> + double addrspace(1)* %in2) {
> + %r0 = load double addrspace(1)* %in1
> + %r1 = load double addrspace(1)* %in2
> + %r0.fabs = call double @llvm.fabs.f64(double %r0) #0
> + %r2 = fsub double %r0.fabs, %r1
> + store double %r2, double addrspace(1)* %out
> + ret void
> +}
> +
> +; SI-LABEL: {{^}}s_fsub_f64:
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
> +define void @s_fsub_f64(double addrspace(1)* %out, double %a, double %b) {
> + %sub = fsub double %a, %b
> + store double %sub, double addrspace(1)* %out
> + ret void
> +}
> +
> +; SI-LABEL: {{^}}s_fsub_imm_f64:
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], 4.0, -s\[[0-9]+:[0-9]+\]}}
> +define void @s_fsub_imm_f64(double addrspace(1)* %out, double %a, double %b) {
> + %sub = fsub double 4.0, %a
> + store double %sub, double addrspace(1)* %out
> + ret void
> +}
> +
> +; SI-LABEL: {{^}}s_fsub_imm_inv_f64:
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], -4.0, s\[[0-9]+:[0-9]+\]}}
> +define void @s_fsub_imm_inv_f64(double addrspace(1)* %out, double %a, double %b) {
> + %sub = fsub double %a, 4.0
> + store double %sub, double addrspace(1)* %out
> + ret void
> +}
> +
> +; SI-LABEL: {{^}}s_fsub_self_f64:
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -s\[[0-9]+:[0-9]+\]}}
> +define void @s_fsub_self_f64(double addrspace(1)* %out, double %a) {
> + %sub = fsub double %a, %a
> + store double %sub, double addrspace(1)* %out
> + ret void
> +}
> +
> +; SI-LABEL: {{^}}fsub_v2f64:
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
> +define void @fsub_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) {
> + %sub = fsub <2 x double> %a, %b
> + store <2 x double> %sub, <2 x double> addrspace(1)* %out
> + ret void
> +}
> +
> +; SI-LABEL: {{^}}fsub_v4f64:
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
> +define void @fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) {
> + %b_ptr = getelementptr <4 x double> addrspace(1)* %in, i32 1
> + %a = load <4 x double> addrspace(1)* %in
> + %b = load <4 x double> addrspace(1)* %b_ptr
> + %result = fsub <4 x double> %a, %b
> + store <4 x double> %result, <4 x double> addrspace(1)* %out
> + ret void
> }
> +
> +; SI-LABEL: {{^}}s_fsub_v4f64:
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
> +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
> +define void @s_fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) {
> + %result = fsub <4 x double> %a, %b
> + store <4 x double> %result, <4 x double> addrspace(1)* %out, align 16
> + ret void
> +}
> +
> +attributes #0 = { nounwind readnone }
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list