[llvm] r339077 - AMDGPU: Handle some vector operations in isCanonicalized
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 6 15:45:51 PDT 2018
Author: arsenm
Date: Mon Aug 6 15:45:51 2018
New Revision: 339077
URL: http://llvm.org/viewvc/llvm-project?rev=339077&view=rev
Log:
AMDGPU: Handle some vector operations in isCanonicalized
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=339077&r1=339076&r2=339077&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Mon Aug 6 15:45:51 2018
@@ -6843,6 +6843,26 @@ bool SITargetLowering::isCanonicalized(S
return isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1) &&
isCanonicalized(DAG, Op.getOperand(2), MaxDepth - 1);
}
+ case ISD::BUILD_VECTOR: {
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ SDValue SrcOp = Op.getOperand(i);
+ if (!isCanonicalized(DAG, SrcOp, MaxDepth - 1))
+ return false;
+ }
+
+ return true;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::EXTRACT_SUBVECTOR: {
+ return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1) &&
+ isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1);
+ }
+ case ISD::UNDEF:
+ // Could be anything.
+ return false;
default:
return denormalsEnabledForType(Op.getValueType()) &&
DAG.isKnownNeverSNaN(Op);
Modified: llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll?rev=339077&r1=339076&r2=339077&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll Mon Aug 6 15:45:51 2018
@@ -743,6 +743,90 @@ define amdgpu_ps float @test_fold_canoni
ret float %canonicalized
}
+; GCN-LABEL: {{^}}v_test_canonicalize_build_vector_v2f16:
+; GFX9-DAG: v_add_f16_e32
+; GFX9-DAG: v_mul_f16_e32
+; GFX9-NOT: v_max
+; GFX9-NOT: v_pk_max
+define <2 x half> @v_test_canonicalize_build_vector_v2f16(<2 x half> %vec) {
+ %lo = extractelement <2 x half> %vec, i32 0
+ %hi = extractelement <2 x half> %vec, i32 1
+ %lo.op = fadd half %lo, 1.0
+ %hi.op = fmul half %lo, 4.0
+ %ins0 = insertelement <2 x half> undef, half %lo.op, i32 0
+ %ins1 = insertelement <2 x half> %ins0, half %hi.op, i32 1
+ %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins1)
+ ret <2 x half> %canonicalized
+}
+
+; GCN-LABEL: {{^}}v_test_canonicalize_build_vector_noncanon1_v2f16:
+; GFX9: v_add_f16_e32
+; GFX9: v_pk_max
+define <2 x half> @v_test_canonicalize_build_vector_noncanon1_v2f16(<2 x half> %vec) {
+ %lo = extractelement <2 x half> %vec, i32 0
+ %lo.op = fadd half %lo, 1.0
+ %ins = insertelement <2 x half> %vec, half %lo.op, i32 0
+ %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
+ ret <2 x half> %canonicalized
+}
+
+; GCN-LABEL: {{^}}v_test_canonicalize_build_vector_noncanon0_v2f16:
+; GFX9: v_add_f16_sdwa
+; GFX9: v_pk_max
+define <2 x half> @v_test_canonicalize_build_vector_noncanon0_v2f16(<2 x half> %vec) {
+ %hi = extractelement <2 x half> %vec, i32 1
+ %hi.op = fadd half %hi, 1.0
+ %ins = insertelement <2 x half> %vec, half %hi.op, i32 1
+ %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
+ ret <2 x half> %canonicalized
+}
+
+; GCN-LABEL: {{^}}v_test_canonicalize_extract_element_v2f16:
+; GFX9: s_waitcnt
+; GFX9-NEXT: v_pk_mul_f16 v0, v0, 4.0 op_sel_hi:[1,0]
+; GFX9-NEXT: s_setpc_b64
+define half @v_test_canonicalize_extract_element_v2f16(<2 x half> %vec) {
+ %vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
+ %elt = extractelement <2 x half> %vec.op, i32 0
+ %canonicalized = call half @llvm.canonicalize.f16(half %elt)
+ ret half %canonicalized
+}
+
+; GCN-LABEL: {{^}}v_test_canonicalize_insertelement_v2f16:
+; GFX9: v_pk_mul_f16
+; GFX9: v_mul_f16_e32
+; GFX9-NOT: v_max
+; GFX9-NOT: v_pk_max
+define <2 x half> @v_test_canonicalize_insertelement_v2f16(<2 x half> %vec, half %val, i32 %idx) {
+ %vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
+ %ins.op = fmul half %val, 8.0
+ %ins = insertelement <2 x half> %vec.op, half %ins.op, i32 %idx
+ %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
+ ret <2 x half> %canonicalized
+}
+
+; GCN-LABEL: {{^}}v_test_canonicalize_insertelement_noncanon_vec_v2f16:
+; GFX9: v_mul_f16
+; GFX9: v_pk_max_f16 v0, v0, v0
+; GFX9-NEXT: s_setpc_b64
+define <2 x half> @v_test_canonicalize_insertelement_noncanon_vec_v2f16(<2 x half> %vec, half %val, i32 %idx) {
+ %ins.op = fmul half %val, 8.0
+ %ins = insertelement <2 x half> %vec, half %ins.op, i32 %idx
+ %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
+ ret <2 x half> %canonicalized
+}
+
+; GCN-LABEL: {{^}}v_test_canonicalize_insertelement_noncanon_insval_v2f16:
+; GFX9: v_pk_mul_f16
+; GFX9: v_pk_max_f16 v0, v0, v0
+; GFX9-NEXT: s_setpc_b64
+define <2 x half> @v_test_canonicalize_insertelement_noncanon_insval_v2f16(<2 x half> %vec, half %val, i32 %idx) {
+ %vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
+ %ins = insertelement <2 x half> %vec.op, half %val, i32 %idx
+ %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
+ ret <2 x half> %canonicalized
+}
+
; Avoid failing the test on FreeBSD11.0 which will match the GCN-NOT: 1.0
; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive
; CHECK: .amd_amdgpu_isa
More information about the llvm-commits
mailing list