[PATCH][AVX512] Add mem patterns for 512b FP rounds
Cameron McInally
cameron.mcinally at nyu.edu
Tue Feb 17 08:23:19 PST 2015
Hey guys,
Attached is a patch to add patterns for 512b FP rounds that fold a load.
I'm somewhat unhappy with the verbosity of this patch, but couldn't
find a more compact representation. Any suggestions?
Thanks,
Cameron
-------------- next part --------------
Index: lib/Target/X86/X86InstrAVX512.td
===================================================================
--- lib/Target/X86/X86InstrAVX512.td (revision 229497)
+++ lib/Target/X86/X86InstrAVX512.td (working copy)
@@ -4683,25 +4683,45 @@
(VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
}
+def : Pat<(v16f32 (ffloor (loadv16f32 addr:$src))),
+ (VRNDSCALEPSZm addr:$src, (i32 0x1))>;
def : Pat<(v16f32 (ffloor VR512:$src)),
(VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
+def : Pat<(v16f32 (fnearbyint (loadv16f32 addr:$src))),
+ (VRNDSCALEPSZm addr:$src, (i32 0xC))>;
def : Pat<(v16f32 (fnearbyint VR512:$src)),
(VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
+def : Pat<(v16f32 (fceil (loadv16f32 addr:$src))),
+ (VRNDSCALEPSZm addr:$src, (i32 0x2))>;
def : Pat<(v16f32 (fceil VR512:$src)),
(VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
+def : Pat<(v16f32 (frint (loadv16f32 addr:$src))),
+ (VRNDSCALEPSZm addr:$src, (i32 0x4))>;
def : Pat<(v16f32 (frint VR512:$src)),
(VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
+def : Pat<(v16f32 (ftrunc (loadv16f32 addr:$src))),
+ (VRNDSCALEPSZm addr:$src, (i32 0x3))>;
def : Pat<(v16f32 (ftrunc VR512:$src)),
(VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
+def : Pat<(v8f64 (ffloor (loadv8f64 addr:$src))),
+ (VRNDSCALEPDZm addr:$src, (i32 0x1))>;
def : Pat<(v8f64 (ffloor VR512:$src)),
(VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
+def : Pat<(v8f64 (fnearbyint (loadv8f64 addr:$src))),
+ (VRNDSCALEPDZm addr:$src, (i32 0xC))>;
def : Pat<(v8f64 (fnearbyint VR512:$src)),
(VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
+def : Pat<(v8f64 (fceil (loadv8f64 addr:$src))),
+ (VRNDSCALEPDZm addr:$src, (i32 0x2))>;
def : Pat<(v8f64 (fceil VR512:$src)),
(VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
+def : Pat<(v8f64 (frint (loadv8f64 addr:$src))),
+ (VRNDSCALEPDZm addr:$src, (i32 0x4))>;
def : Pat<(v8f64 (frint VR512:$src)),
(VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
+def : Pat<(v8f64 (ftrunc (loadv8f64 addr:$src))),
+ (VRNDSCALEPDZm addr:$src, (i32 0x3))>;
def : Pat<(v8f64 (ftrunc VR512:$src)),
(VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
Index: test/CodeGen/X86/avx512-round.ll
===================================================================
--- test/CodeGen/X86/avx512-round.ll (revision 229497)
+++ test/CodeGen/X86/avx512-round.ll (working copy)
@@ -6,6 +6,13 @@
%res = call <16 x float> @llvm.floor.v16f32(<16 x float> %a)
ret <16 x float> %res
}
+define <16 x float> @floor_v16f32_mem(<16 x float>* %ptr) {
+; CHECK-LABEL: floor_v16f32_mem
+; CHECK: vrndscaleps $1, ({{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0x07,0x01]
+ %a = load <16 x float>* %ptr
+ %res = call <16 x float> @llvm.floor.v16f32(<16 x float> %a)
+ ret <16 x float> %res
+}
declare <16 x float> @llvm.floor.v16f32(<16 x float> %p)
define <8 x double> @floor_v8f64(<8 x double> %a) {
@@ -14,6 +21,13 @@
%res = call <8 x double> @llvm.floor.v8f64(<8 x double> %a)
ret <8 x double> %res
}
+define <8 x double> @floor_v8f64_mem(<8 x double>* %ptr) {
+; CHECK-LABEL: floor_v8f64_mem
+; CHECK: vrndscalepd $1, ({{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0x07,0x01]
+ %a = load <8 x double>* %ptr
+ %res = call <8 x double> @llvm.floor.v8f64(<8 x double> %a)
+ ret <8 x double> %res
+}
declare <8 x double> @llvm.floor.v8f64(<8 x double> %p)
define <16 x float> @ceil_v16f32(<16 x float> %a) {
@@ -22,6 +36,13 @@
%res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %a)
ret <16 x float> %res
}
+define <16 x float> @ceil_v16f32_mem(<16 x float>* %ptr) {
+; CHECK-LABEL: ceil_v16f32_mem
+; CHECK: vrndscaleps $2, ({{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0x07,0x02]
+ %a = load <16 x float>* %ptr
+ %res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %a)
+ ret <16 x float> %res
+}
declare <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
define <8 x double> @ceil_v8f64(<8 x double> %a) {
@@ -30,6 +51,13 @@
%res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %a)
ret <8 x double> %res
}
+define <8 x double> @ceil_v8f64_mem(<8 x double>* %ptr) {
+; CHECK-LABEL: ceil_v8f64_mem
+; CHECK: vrndscalepd $2, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0x07,0x02]
+ %a = load <8 x double>* %ptr
+ %res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %a)
+ ret <8 x double> %res
+}
declare <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
define <16 x float> @trunc_v16f32(<16 x float> %a) {
@@ -38,6 +66,13 @@
%res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %a)
ret <16 x float> %res
}
+define <16 x float> @trunc_v16f32_mem(<16 x float>* %ptr) {
+; CHECK-LABEL: trunc_v16f32_mem
+; CHECK: vrndscaleps $3, ({{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0x07,0x03]
+ %a = load <16 x float>* %ptr
+ %res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %a)
+ ret <16 x float> %res
+}
declare <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
define <8 x double> @trunc_v8f64(<8 x double> %a) {
@@ -46,6 +81,13 @@
%res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %a)
ret <8 x double> %res
}
+define <8 x double> @trunc_v8f64_mem(<8 x double>* %ptr) {
+; CHECK-LABEL: trunc_v8f64_mem
+; CHECK: vrndscalepd $3, ({{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0x07,0x03]
+ %a = load <8 x double>* %ptr
+ %res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %a)
+ ret <8 x double> %res
+}
declare <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
define <16 x float> @rint_v16f32(<16 x float> %a) {
@@ -54,6 +96,13 @@
%res = call <16 x float> @llvm.rint.v16f32(<16 x float> %a)
ret <16 x float> %res
}
+define <16 x float> @rint_v16f32_mem(<16 x float>* %ptr) {
+; CHECK-LABEL: rint_v16f32_mem
+; CHECK: vrndscaleps $4, ({{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0x07,0x04]
+ %a = load <16 x float>* %ptr
+ %res = call <16 x float> @llvm.rint.v16f32(<16 x float> %a)
+ ret <16 x float> %res
+}
declare <16 x float> @llvm.rint.v16f32(<16 x float> %p)
define <8 x double> @rint_v8f64(<8 x double> %a) {
@@ -62,6 +111,13 @@
%res = call <8 x double> @llvm.rint.v8f64(<8 x double> %a)
ret <8 x double> %res
}
+define <8 x double> @rint_v8f64_mem(<8 x double>* %ptr) {
+; CHECK-LABEL: rint_v8f64_mem
+; CHECK: vrndscalepd $4, ({{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0x07,0x04]
+ %a = load <8 x double>* %ptr
+ %res = call <8 x double> @llvm.rint.v8f64(<8 x double> %a)
+ ret <8 x double> %res
+}
declare <8 x double> @llvm.rint.v8f64(<8 x double> %p)
define <16 x float> @nearbyint_v16f32(<16 x float> %a) {
@@ -70,6 +126,13 @@
%res = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %a)
ret <16 x float> %res
}
+define <16 x float> @nearbyint_v16f32_mem(<16 x float>* %ptr) {
+; CHECK-LABEL: nearbyint_v16f32_mem
+; CHECK: vrndscaleps $12, ({{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0x07,0x0c]
+ %a = load <16 x float>* %ptr
+ %res = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %a)
+ ret <16 x float> %res
+}
declare <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
define <8 x double> @nearbyint_v8f64(<8 x double> %a) {
@@ -78,4 +141,11 @@
%res = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %a)
ret <8 x double> %res
}
+define <8 x double> @nearbyint_v8f64_mem(<8 x double>* %ptr) {
+; CHECK-LABEL: nearbyint_v8f64_mem
+; CHECK: vrndscalepd $12, ({{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0x07,0x0c]
+ %a = load <8 x double>* %ptr
+ %res = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %a)
+ ret <8 x double> %res
+}
declare <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
More information about the llvm-commits
mailing list