[PATCH 1/1] R600: Expand vector fceil.i32
Tom Stellard
tom at stellard.net
Wed Jun 18 08:50:27 PDT 2014
On Wed, Jun 18, 2014 at 11:27:20AM -0400, Jan Vesely wrote:
> Move previous fp64 fceil tests to fceil64.ll
>
I'm a little confused by the commit message. Why is it fceil.i32
and not plain fceil? Otherwise, LGTM.
-Tom
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
> lib/Target/R600/AMDGPUISelLowering.cpp | 1 +
> test/CodeGen/R600/fceil.ll | 181 +++++++++++++++++++++------------
> test/CodeGen/R600/fceil64.ll | 84 +++++++++++++++
> 3 files changed, 199 insertions(+), 67 deletions(-)
> create mode 100644 test/CodeGen/R600/fceil64.ll
>
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> index 99aceeb..0422657 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -316,6 +316,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
> for (MVT VT : FloatVectorTypes) {
> setOperationAction(ISD::FABS, VT, Expand);
> setOperationAction(ISD::FADD, VT, Expand);
> + setOperationAction(ISD::FCEIL, VT, Expand);
> setOperationAction(ISD::FCOS, VT, Expand);
> setOperationAction(ISD::FDIV, VT, Expand);
> setOperationAction(ISD::FPOW, VT, Expand);
> diff --git a/test/CodeGen/R600/fceil.ll b/test/CodeGen/R600/fceil.ll
> index b8b945f..458363a 100644
> --- a/test/CodeGen/R600/fceil.ll
> +++ b/test/CodeGen/R600/fceil.ll
> @@ -1,84 +1,131 @@
> -; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
> +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
>
> -declare double @llvm.ceil.f64(double) nounwind readnone
> -declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
> -declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone
> -declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
> -declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
> -declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
> +declare float @llvm.ceil.f32(float) nounwind readnone
> +declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone
> +declare <3 x float> @llvm.ceil.v3f32(<3 x float>) nounwind readnone
> +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
> +declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone
> +declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone
>
> -; CI-LABEL: @fceil_f64:
> -; CI: V_CEIL_F64_e32
> -define void @fceil_f64(double addrspace(1)* %out, double %x) {
> - %y = call double @llvm.ceil.f64(double %x) nounwind readnone
> - store double %y, double addrspace(1)* %out
> +; FUNC-LABEL: @fceil_f32:
> +; SI: V_CEIL_F32_e32
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
> +; EG: CEIL {{\*? *}}[[RESULT]]
> +define void @fceil_f32(float addrspace(1)* %out, float %x) {
> + %y = call float @llvm.ceil.f32(float %x) nounwind readnone
> + store float %y, float addrspace(1)* %out
> ret void
> }
>
> -; CI-LABEL: @fceil_v2f64:
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
> - %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
> - store <2 x double> %y, <2 x double> addrspace(1)* %out
> +; FUNC-LABEL: @fceil_v2f32:
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
> +; EG: CEIL {{\*? *}}[[RESULT]]
> +; EG: CEIL {{\*? *}}[[RESULT]]
> +define void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
> + %y = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x) nounwind readnone
> + store <2 x float> %y, <2 x float> addrspace(1)* %out
> ret void
> }
>
> -; FIXME-CI-LABEL: @fceil_v3f64:
> -; FIXME-CI: V_CEIL_F64_e32
> -; FIXME-CI: V_CEIL_F64_e32
> -; FIXME-CI: V_CEIL_F64_e32
> -; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
> -; %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
> -; store <3 x double> %y, <3 x double> addrspace(1)* %out
> -; ret void
> -; }
> +; FUNC-LABEL: @fceil_v3f32:
> +; FIXME-SI: V_CEIL_F32_e32
> +; FIXME-SI: V_CEIL_F32_e32
> +; FIXME-SI: V_CEIL_F32_e32
> +; FIXME-EG: v3 is treated as v2 and v1, hence 2 stores
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
> +; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
> +define void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
> + %y = call <3 x float> @llvm.ceil.v3f32(<3 x float> %x) nounwind readnone
> + store <3 x float> %y, <3 x float> addrspace(1)* %out
> + ret void
> +}
>
> -; CI-LABEL: @fceil_v4f64:
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
> - %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
> - store <4 x double> %y, <4 x double> addrspace(1)* %out
> +; FUNC-LABEL: @fceil_v4f32:
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
> +; EG: CEIL {{\*? *}}[[RESULT]]
> +; EG: CEIL {{\*? *}}[[RESULT]]
> +; EG: CEIL {{\*? *}}[[RESULT]]
> +; EG: CEIL {{\*? *}}[[RESULT]]
> +define void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
> + %y = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
> + store <4 x float> %y, <4 x float> addrspace(1)* %out
> ret void
> }
>
> -; CI-LABEL: @fceil_v8f64:
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
> - %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
> - store <8 x double> %y, <8 x double> addrspace(1)* %out
> +; FUNC-LABEL: @fceil_v8f32:
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
> +; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
> +define void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
> + %y = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) nounwind readnone
> + store <8 x float> %y, <8 x float> addrspace(1)* %out
> ret void
> }
>
> -; CI-LABEL: @fceil_v16f64:
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -; CI: V_CEIL_F64_e32
> -define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
> - %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
> - store <16 x double> %y, <16 x double> addrspace(1)* %out
> +; FUNC-LABEL: @fceil_v16f32:
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; SI: V_CEIL_F32_e32
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT3:T[0-9]+]]{{\.[XYZW]}}
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT4:T[0-9]+]]{{\.[XYZW]}}
> +; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
> +; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
> +define void @fceil_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) {
> + %y = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) nounwind readnone
> + store <16 x float> %y, <16 x float> addrspace(1)* %out
> ret void
> }
> diff --git a/test/CodeGen/R600/fceil64.ll b/test/CodeGen/R600/fceil64.ll
> new file mode 100644
> index 0000000..b8b945f
> --- /dev/null
> +++ b/test/CodeGen/R600/fceil64.ll
> @@ -0,0 +1,84 @@
> +; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
> +
> +declare double @llvm.ceil.f64(double) nounwind readnone
> +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
> +declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone
> +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
> +declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
> +declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
> +
> +; CI-LABEL: @fceil_f64:
> +; CI: V_CEIL_F64_e32
> +define void @fceil_f64(double addrspace(1)* %out, double %x) {
> + %y = call double @llvm.ceil.f64(double %x) nounwind readnone
> + store double %y, double addrspace(1)* %out
> + ret void
> +}
> +
> +; CI-LABEL: @fceil_v2f64:
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
> + %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
> + store <2 x double> %y, <2 x double> addrspace(1)* %out
> + ret void
> +}
> +
> +; FIXME-CI-LABEL: @fceil_v3f64:
> +; FIXME-CI: V_CEIL_F64_e32
> +; FIXME-CI: V_CEIL_F64_e32
> +; FIXME-CI: V_CEIL_F64_e32
> +; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
> +; %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
> +; store <3 x double> %y, <3 x double> addrspace(1)* %out
> +; ret void
> +; }
> +
> +; CI-LABEL: @fceil_v4f64:
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
> + %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
> + store <4 x double> %y, <4 x double> addrspace(1)* %out
> + ret void
> +}
> +
> +; CI-LABEL: @fceil_v8f64:
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
> + %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
> + store <8 x double> %y, <8 x double> addrspace(1)* %out
> + ret void
> +}
> +
> +; CI-LABEL: @fceil_v16f64:
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
> + %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
> + store <16 x double> %y, <16 x double> addrspace(1)* %out
> + ret void
> +}
> --
> 1.9.3
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list