[llvm] r254841 - [X86][FMA4] Explicitly set the domain of FMA4 float/double scalar instructions
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 4 23:07:43 PST 2015
Author: rksimon
Date: Sat Dec 5 01:07:42 2015
New Revision: 254841
URL: http://llvm.org/viewvc/llvm-project?rev=254841&view=rev
Log:
[X86][FMA4] Explicitly set the domain of FMA4 float/double scalar instructions
Both were defaulting to the float domain - now matches the packed instructions.
Modified:
llvm/trunk/lib/Target/X86/X86InstrFMA.td
llvm/trunk/test/CodeGen/X86/fma_patterns.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrFMA.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFMA.td?rev=254841&r1=254840&r2=254841&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFMA.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFMA.td Sat Dec 5 01:07:42 2015
@@ -374,36 +374,23 @@ let isCodeGenOnly = 1, ForceDisassemble
} // isCodeGenOnly = 1
}
-defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
- fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
- int_x86_fma_vfmadd_ss>;
-defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
- fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
- int_x86_fma_vfmadd_sd>;
-defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
- fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
- int_x86_fma_vfmsub_ss>;
-defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
- fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
- int_x86_fma_vfmsub_sd>;
-defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
- X86Fnmadd, loadf32>,
- fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
- int_x86_fma_vfnmadd_ss>;
-defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
- X86Fnmadd, loadf64>,
- fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
- int_x86_fma_vfnmadd_sd>;
-defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
- X86Fnmsub, loadf32>,
- fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
- int_x86_fma_vfnmsub_ss>;
-defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
- X86Fnmsub, loadf64>,
- fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
- int_x86_fma_vfnmsub_sd>;
-
let ExeDomain = SSEPackedSingle in {
+ // Scalar Instructions
+ defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
+ fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
+ int_x86_fma_vfmadd_ss>;
+ defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
+ fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
+ int_x86_fma_vfmsub_ss>;
+ defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
+ X86Fnmadd, loadf32>,
+ fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
+ int_x86_fma_vfnmadd_ss>;
+ defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
+ X86Fnmsub, loadf32>,
+ fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
+ int_x86_fma_vfnmsub_ss>;
+ // Packed Instructions
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
loadv4f32, loadv8f32>;
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
@@ -419,6 +406,22 @@ let ExeDomain = SSEPackedSingle in {
}
let ExeDomain = SSEPackedDouble in {
+ // Scalar Instructions
+ defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
+ fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
+ int_x86_fma_vfmadd_sd>;
+ defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
+ fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
+ int_x86_fma_vfmsub_sd>;
+ defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
+ X86Fnmadd, loadf64>,
+ fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
+ int_x86_fma_vfnmadd_sd>;
+ defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
+ X86Fnmsub, loadf64>,
+ fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
+ int_x86_fma_vfnmsub_sd>;
+ // Packed Instructions
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
loadv2f64, loadv4f64>;
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
Modified: llvm/trunk/test/CodeGen/X86/fma_patterns.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma_patterns.ll?rev=254841&r1=254840&r2=254841&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma_patterns.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma_patterns.ll Sat Dec 5 01:07:42 2015
@@ -1120,7 +1120,7 @@ define double @test_f64_fneg_fmul(double
;
; FMA4-LABEL: test_f64_fneg_fmul:
; FMA4: # BB#0:
-; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: retq
;
More information about the llvm-commits
mailing list